This class implements a {@link MergePolicy} that tries to merge segments into levels of exponentially increasing size, where each level has < mergeFactor segments in it. Whenever a given levle has mergeFactor segments or more in it, they will be merged.

This class is abstract and requires a subclass to define the {@link #size} method which specifies how a segment's size is determined. {@link LogDocMergePolicy} is one subclass that measures size by document count in the segment. {@link LogByteSizeMergePolicy} is another subclass that measures size as the total byte size of the file(s) for the segment.

Inheritance: MergePolicy
Exemple #1
0
        public virtual void TestNoNrmFile()
        {
            Directory      ram      = NewDirectory();
            Analyzer       analyzer = new MockAnalyzer(Random);
            IndexWriter    writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy()));
            LogMergePolicy lmp      = (LogMergePolicy)writer.Config.MergePolicy;

            lmp.MergeFactor = 2;
            lmp.NoCFSRatio  = 0.0;
            Document d = new Document();

            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.OmitNorms = true;
            Field f1 = NewField("f1", "this field has no norms", customType);

            d.Add(f1);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            writer.Commit();

            AssertNoNrm(ram);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            AssertNoNrm(ram);
            ram.Dispose();
        }
        public void TestExceptionOnBackgroundThreadIsPropagatedToCallingThread()
        {
            using MockDirectoryWrapper dir = NewMockDirectory();
            dir.FailOn(new FailOnlyOnMerge());

            Document doc     = new Document();
            Field    idField = NewStringField("id", "", Field.Store.YES);

            doc.Add(idField);

            var mergeScheduler = new ConcurrentMergeScheduler();

            using IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergeScheduler(mergeScheduler).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy()));
            LogMergePolicy logMP = (LogMergePolicy)writer.Config.MergePolicy;

            logMP.MergeFactor = 10;
            for (int i = 0; i < 20; i++)
            {
                writer.AddDocument(doc);
            }

            bool exceptionHit = false;

            try
            {
                mergeScheduler.Sync();
            }
            catch (MergePolicy.MergeException)
            {
                exceptionHit = true;
            }

            assertTrue(exceptionHit);
        }
Exemple #3
0
        public void TestSubclassTaskMergeScheduler()
        {
            MockDirectoryWrapper dir = NewMockDirectory();

            dir.FailOn(new FailOnlyOnMerge());

            Document doc     = new Document();
            Field    idField = NewStringField("id", "", Field.Store.YES);

            doc.Add(idField);

            IndexWriter    writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergeScheduler(new MyMergeScheduler(this)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy()));
            LogMergePolicy logMP  = (LogMergePolicy)writer.Config.MergePolicy;

            logMP.MergeFactor = 10;
            for (int i = 0; i < 20; i++)
            {
                writer.AddDocument(doc);
            }

            ((MyMergeScheduler)writer.Config.MergeScheduler).Sync();
            writer.Dispose();

            assertTrue(mergeCalled);
            dir.Dispose();
        }
        public virtual DocsAndWriter IndexRandomIWReader(int nThreads, int iterations, int range, Directory dir)
        {
            IDictionary <string, Document> docs = new Dictionary <string, Document>();
            IndexWriter w = RandomIndexWriter.MockIndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetRAMBufferSizeMB(0.1).SetMaxBufferedDocs(maxBufferedDocs).SetMergePolicy(NewLogMergePolicy()), new YieldTestPoint(this));

            w.Commit();
            LogMergePolicy lmp = (LogMergePolicy)w.Config.MergePolicy;

            lmp.NoCFSRatio  = 0.0;
            lmp.MergeFactor = mergeFactor;

            /*
             * ///    w.setMaxMergeDocs(Integer.MAX_VALUE);
             * ///    w.setMaxFieldLength(10000);
             * ///    w.SetRAMBufferSizeMB(1);
             * ///    w.setMergeFactor(10);
             */

            threads = new IndexingThread[nThreads];
            for (int i = 0; i < threads.Length; i++)
            {
                IndexingThread th = new IndexingThread(this);
                th.w          = w;
                th.@base      = 1000000 * i;
                th.range      = range;
                th.iterations = iterations;
                threads[i]    = th;
            }

            for (int i = 0; i < threads.Length; i++)
            {
                threads[i].Start();
            }
            for (int i = 0; i < threads.Length; i++)
            {
                threads[i].Join();
            }

            // w.ForceMerge(1);
            //w.Dispose();

            for (int i = 0; i < threads.Length; i++)
            {
                IndexingThread th = threads[i];
                lock (th)
                {
                    docs.PutAll(th.docs);
                }
            }

            TestUtil.CheckIndex(dir);
            DocsAndWriter dw = new DocsAndWriter();

            dw.docs   = docs;
            dw.writer = w;
            return(dw);
        }
Exemple #5
0
        public virtual IDictionary <string, Document> IndexRandom(int nThreads, int iterations, int range, Directory dir, int maxThreadStates, bool doReaderPooling)
        {
            IDictionary <string, Document> docs = new Dictionary <string, Document>();
            IndexWriter    w   = RandomIndexWriter.MockIndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetRAMBufferSizeMB(0.1).SetMaxBufferedDocs(maxBufferedDocs).SetIndexerThreadPool(new DocumentsWriterPerThreadPool(maxThreadStates)).SetReaderPooling(doReaderPooling).SetMergePolicy(NewLogMergePolicy()), new YieldTestPoint(this));
            LogMergePolicy lmp = (LogMergePolicy)w.Config.MergePolicy;

            lmp.NoCFSRatio  = 0.0;
            lmp.MergeFactor = mergeFactor;

            threads = new IndexingThread[nThreads];
            for (int i = 0; i < threads.Length; i++)
            {
                IndexingThread th = new IndexingThread(this);
                th.w          = w;
                th.@base      = 1000000 * i;
                th.range      = range;
                th.iterations = iterations;
                threads[i]    = th;
            }

            for (int i = 0; i < threads.Length; i++)
            {
                threads[i].Start();
            }
            for (int i = 0; i < threads.Length; i++)
            {
                threads[i].Join();
            }

            //w.ForceMerge(1);
            w.Dispose();

            for (int i = 0; i < threads.Length; i++)
            {
                IndexingThread th = threads[i];
                UninterruptableMonitor.Enter(th);
                try
                {
                    docs.PutAll(th.docs);
                }
                finally
                {
                    UninterruptableMonitor.Exit(th);
                }
            }

            //System.out.println("TEST: checkindex");
            TestUtil.CheckIndex(dir);

            return(docs);
        }
Exemple #6
0
        public virtual void TestNoPrxFile()
        {
            Directory      ram      = NewDirectory();
            Analyzer       analyzer = new MockAnalyzer(Random);
            IndexWriter    writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy()));
            LogMergePolicy lmp      = (LogMergePolicy)writer.Config.MergePolicy;

            lmp.MergeFactor = 2;
            lmp.NoCFSRatio  = 0.0;
            Document d = new Document();

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
            Field f1 = NewField("f1", "this field has term freqs", ft);

            d.Add(f1);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            writer.Commit();

            AssertNoPrx(ram);

            // now add some documents with positions, and check there is no prox after optimization
            d  = new Document();
            f1 = NewTextField("f1", "this field has positions", Field.Store.NO);
            d.Add(f1);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            AssertNoPrx(ram);
            ram.Dispose();
        }
        public virtual void TestSetMaxMergeDocs()
        {
            Directory         dir  = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergeScheduler(new MyMergeScheduler(this)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy());
            LogMergePolicy    lmp  = (LogMergePolicy)conf.MergePolicy;

            lmp.MaxMergeDocs = 20;
            lmp.MergeFactor  = 2;
            IndexWriter iw       = new IndexWriter(dir, conf);
            Document    document = new Document();

            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.StoreTermVectors = true;

            document.Add(NewField("tvtest", "a b c", customType));
            for (int i = 0; i < 177; i++)
            {
                iw.AddDocument(document);
            }
            iw.Dispose();
            dir.Dispose();
        }
Exemple #8
0
        public virtual void TestManyReopensAndFields()
        {
            Directory         dir    = NewDirectory();
            Random            random = Random;
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
            LogMergePolicy    lmp    = NewLogMergePolicy();

            lmp.MergeFactor = 3; // merge often
            conf.SetMergePolicy(lmp);
            IndexWriter writer = new IndexWriter(dir, conf);

            bool            isNRT = random.NextBoolean();
            DirectoryReader reader;

            if (isNRT)
            {
                reader = DirectoryReader.Open(writer, true);
            }
            else
            {
                writer.Commit();
                reader = DirectoryReader.Open(dir);
            }

            int numFields    = random.Next(4) + 3;             // 3-7
            int numNDVFields = random.Next(numFields / 2) + 1; // 1-3

            long[] fieldValues   = new long[numFields];
            bool[] fieldHasValue = new bool[numFields];
            Arrays.Fill(fieldHasValue, true);
            for (int i = 0; i < fieldValues.Length; i++)
            {
                fieldValues[i] = 1;
            }

            int numRounds = AtLeast(15);
            int docID     = 0;

            for (int i = 0; i < numRounds; i++)
            {
                int numDocs = AtLeast(5);
                //      System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs);
                for (int j = 0; j < numDocs; j++)
                {
                    Document doc = new Document();
                    doc.Add(new StringField("id", "doc-" + docID, Store.NO));
                    doc.Add(new StringField("key", "all", Store.NO)); // update key
                    // add all fields with their current value
                    for (int f = 0; f < fieldValues.Length; f++)
                    {
                        if (f < numNDVFields)
                        {
                            doc.Add(new NumericDocValuesField("f" + f, fieldValues[f]));
                        }
                        else
                        {
                            doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f])));
                        }
                    }
                    writer.AddDocument(doc);
                    ++docID;
                }

                // if field's value was unset before, unset it from all new added documents too
                for (int field = 0; field < fieldHasValue.Length; field++)
                {
                    if (!fieldHasValue[field])
                    {
                        if (field < numNDVFields)
                        {
                            writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null);
                        }
                        else
                        {
                            writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null);
                        }
                    }
                }

                int    fieldIdx    = random.Next(fieldValues.Length);
                string updateField = "f" + fieldIdx;
                if (random.NextBoolean())
                {
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'");
                    fieldHasValue[fieldIdx] = false;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null);
                    }
                }
                else
                {
                    fieldHasValue[fieldIdx] = true;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx]));
                    }
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]);
                }

                if (random.NextDouble() < 0.2)
                {
                    int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok!
                    writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc));
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc);
                }

                // verify reader
                if (!isNRT)
                {
                    writer.Commit();
                }

                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader);
                DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader);
                Assert.IsNotNull(newReader);
                reader.Dispose();
                reader = newReader;
                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader);
                Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round
                BytesRef scratch = new BytesRef();
                foreach (AtomicReaderContext context in reader.Leaves)
                {
                    AtomicReader r = context.AtomicReader;
                    //        System.out.println(((SegmentReader) r).getSegmentName());
                    IBits liveDocs = r.LiveDocs;
                    for (int field = 0; field < fieldValues.Length; field++)
                    {
                        string           f             = "f" + field;
                        BinaryDocValues  bdv           = r.GetBinaryDocValues(f);
                        NumericDocValues ndv           = r.GetNumericDocValues(f);
                        IBits            docsWithField = r.GetDocsWithField(f);
                        if (field < numNDVFields)
                        {
                            Assert.IsNotNull(ndv);
                            Assert.IsNull(bdv);
                        }
                        else
                        {
                            Assert.IsNull(ndv);
                            Assert.IsNotNull(bdv);
                        }
                        int maxDoc = r.MaxDoc;
                        for (int doc = 0; doc < maxDoc; doc++)
                        {
                            if (liveDocs is null || liveDocs.Get(doc))
                            {
                                //              System.out.println("doc=" + (doc + context.docBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch));
                                if (fieldHasValue[field])
                                {
                                    Assert.IsTrue(docsWithField.Get(doc));
                                    if (field < numNDVFields)
                                    {
                                        Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                    else
                                    {
                                        Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                }
                                else
                                {
                                    Assert.IsFalse(docsWithField.Get(doc));
                                }
                            }
                        }
                    }
                }
                //      System.out.println();
            }

            IOUtils.Dispose(writer, reader, dir);
        }