SetRAMBufferSizeMB() 공개 메소드

public SetRAMBufferSizeMB ( double ramBufferSizeMB ) : IndexWriterConfig
ramBufferSizeMB double
리턴 IndexWriterConfig
예제 #1
0
        private IndexWriterConfig NewWriterConfig()
        {
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);

            conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            // prevent any merges by default.
            conf.SetMergePolicy(NoMergePolicy.COMPOUND_FILES);
            return(conf);
        }
예제 #2
0
        [Slow] // LUCENENET: occasionally
        public virtual void TestStallControl()
        {
            // LUCENENET specific - disable the test if asserts are not enabled
            AssumeTrue("This test requires asserts to be enabled.", Debugging.AssertsEnabled);

            int[] numThreads          = new int[] { 4 + Random.Next(8), 1 };
            int   numDocumentsToIndex = 50 + Random.Next(50);

            for (int i = 0; i < numThreads.Length; i++)
            {
                AtomicInt32          numDocs = new AtomicInt32(numDocumentsToIndex);
                MockDirectoryWrapper dir     = NewMockDirectory();
                // mock a very slow harddisk sometimes here so that flushing is very slow
                dir.Throttling = Throttling.SOMETIMES;
                IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
                iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy();
                iwc.SetFlushPolicy(flushPolicy);

                DocumentsWriterPerThreadPool threadPool = new DocumentsWriterPerThreadPool(numThreads[i] == 1 ? 1 : 2);
                iwc.SetIndexerThreadPool(threadPool);
                // with such a small ram buffer we should be stalled quiet quickly
                iwc.SetRAMBufferSizeMB(0.25);
                IndexWriter   writer  = new IndexWriter(dir, iwc);
                IndexThread[] threads = new IndexThread[numThreads[i]];
                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x] = new IndexThread(numDocs, writer, lineDocFile, false);
                    threads[x].Start();
                }

                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x].Join();
                }
                DocumentsWriter docsWriter = writer.DocsWriter;
                Assert.IsNotNull(docsWriter);
                DocumentsWriterFlushControl flushControl = docsWriter.flushControl;
                Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due");
                Assert.AreEqual(numDocumentsToIndex, writer.NumDocs);
                Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
                if (numThreads[i] == 1)
                {
                    assertFalse("single thread must not block numThreads: " + numThreads[i], docsWriter.flushControl.stallControl.HasBlocked);
                }
                if (docsWriter.flushControl.peakNetBytes > (2d * iwc.RAMBufferSizeMB * 1024d * 1024d))
                {
                    Assert.IsTrue(docsWriter.flushControl.stallControl.WasStalled);
                }
                AssertActiveBytesAfter(flushControl);
                writer.Dispose(true);
                dir.Dispose();
            }
        }
예제 #3
0
        public virtual void TestFlushDocCount()
        {
            // LUCENENET specific - disable the test if asserts are not enabled
            AssumeTrue("This test requires asserts to be enabled.", Debugging.AssertsEnabled);

            int[] numThreads = new int[] { 2 + AtLeast(1), 1 };
            for (int i = 0; i < numThreads.Length; i++)
            {
                int                    numDocumentsToIndex = 50 + AtLeast(30);
                AtomicInt32            numDocs             = new AtomicInt32(numDocumentsToIndex);
                Directory              dir         = NewDirectory();
                MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
                IndexWriterConfig      iwc         = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetFlushPolicy(flushPolicy);

                int numDWPT = 1 + AtLeast(2);
                DocumentsWriterPerThreadPool threadPool = new DocumentsWriterPerThreadPool(numDWPT);
                iwc.SetIndexerThreadPool(threadPool);
                iwc.SetMaxBufferedDocs(2 + AtLeast(10));
                iwc.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                IndexWriter writer = new IndexWriter(dir, iwc);
                flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
                Assert.IsTrue(flushPolicy.FlushOnDocCount);
                Assert.IsFalse(flushPolicy.FlushOnDeleteTerms);
                Assert.IsFalse(flushPolicy.FlushOnRAM);
                DocumentsWriter docsWriter = writer.DocsWriter;
                Assert.IsNotNull(docsWriter);
                DocumentsWriterFlushControl flushControl = docsWriter.flushControl;
                Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init");

                IndexThread[] threads = new IndexThread[numThreads[i]];
                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x] = new IndexThread(numDocs, writer, lineDocFile, false);
                    threads[x].Start();
                }

                for (int x = 0; x < threads.Length; x++)
                {
                    threads[x].Join();
                }

                Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due numThreads=" + numThreads[i]);
                Assert.AreEqual(numDocumentsToIndex, writer.NumDocs);
                Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
                Assert.IsTrue(flushPolicy.peakDocCountWithoutFlush <= iwc.MaxBufferedDocs, "peak bytes without flush exceeded watermark");
                AssertActiveBytesAfter(flushControl);
                writer.Dispose();
                Assert.AreEqual(0, flushControl.ActiveBytes);
                dir.Dispose();
            }
        }
        public virtual void TestInvalidValues()
        {
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Test IndexDeletionPolicy
            Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.DelPolicy.GetType());
            conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null));
            Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.DelPolicy.GetType());
            try
            {
                conf.SetIndexDeletionPolicy(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            // Test MergeScheduler
            Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType());
            conf.SetMergeScheduler(new SerialMergeScheduler());
            Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType());
            try
            {
                conf.SetMergeScheduler(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            // Test Similarity:
            // we shouldnt assert what the default is, just that its not null.
            Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity);
            conf.SetSimilarity(new MySimilarity());
            Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType());
            try
            {
                conf.SetSimilarity(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            // Test IndexingChain
            Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain);
            conf.SetIndexingChain(new MyIndexingChain());
            Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType());
            try
            {
                conf.SetIndexingChain(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            try
            {
                conf.SetMaxBufferedDeleteTerms(0);
                Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                conf.SetMaxBufferedDocs(1);
                Assert.Fail("should not have succeeded to set maxBufferedDocs to 1");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                // Disable both MAX_BUF_DOCS and RAM_SIZE_MB
                conf.SetMaxBufferedDocs(4);
                conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
            try
            {
                conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            // Test setReaderTermsIndexDivisor
            try
            {
                conf.SetReaderTermsIndexDivisor(0);
                Assert.Fail("should not have succeeded to set termsIndexDivisor to 0");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            // Setting to -1 is ok
            conf.SetReaderTermsIndexDivisor(-1);
            try
            {
                conf.SetReaderTermsIndexDivisor(-2);
                Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                conf.SetRAMPerThreadHardLimitMB(2048);
                Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                conf.SetRAMPerThreadHardLimitMB(0);
                Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            // Test MergePolicy
            Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType());
            conf.SetMergePolicy(new LogDocMergePolicy());
            Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType());
            try
            {
                conf.SetMergePolicy(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }
        }
예제 #5
0
        public virtual void TestTonsOfUpdates()
        {
            // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
            Directory         dir    = NewDirectory();
            Random            random = Random;
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));

            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc
            IndexWriter writer = new IndexWriter(dir, conf);

            // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
            int           numDocs         = AtLeast(20000);
            int           numBinaryFields = AtLeast(5);
            int           numTerms        = TestUtil.NextInt32(random, 10, 100); // terms should affect many docs
            ISet <string> updateTerms     = new JCG.HashSet <string>();

            while (updateTerms.Count < numTerms)
            {
                updateTerms.Add(TestUtil.RandomSimpleString(random));
            }

            //    System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms);

            // build a large index with many BDV fields and update terms
            for (int i = 0; i < numDocs; i++)
            {
                Document doc            = new Document();
                int      numUpdateTerms = TestUtil.NextInt32(random, 1, numTerms / 10);
                for (int j = 0; j < numUpdateTerms; j++)
                {
                    doc.Add(new StringField("upd", RandomPicks.RandomFrom(random, updateTerms), Store.NO));
                }
                for (int j = 0; j < numBinaryFields; j++)
                {
                    long val = random.Next();
                    doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val)));
                    doc.Add(new NumericDocValuesField("cf" + j, val * 2));
                }
                writer.AddDocument(doc);
            }

            writer.Commit(); // commit so there's something to apply to

            // set to flush every 2048 bytes (approximately every 12 updates), so we get
            // many flushes during binary updates
            writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024);
            int numUpdates = AtLeast(100);

            //    System.out.println("numUpdates=" + numUpdates);
            for (int i = 0; i < numUpdates; i++)
            {
                int  field      = random.Next(numBinaryFields);
                Term updateTerm = new Term("upd", RandomPicks.RandomFrom(random, updateTerms));
                long value      = random.Next();
                writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value));
                writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2);
            }

            writer.Dispose();

            DirectoryReader reader  = DirectoryReader.Open(dir);
            BytesRef        scratch = new BytesRef();

            foreach (AtomicReaderContext context in reader.Leaves)
            {
                for (int i = 0; i < numBinaryFields; i++)
                {
                    AtomicReader     r  = context.AtomicReader;
                    BinaryDocValues  f  = r.GetBinaryDocValues("f" + i);
                    NumericDocValues cf = r.GetNumericDocValues("cf" + i);
                    for (int j = 0; j < r.MaxDoc; j++)
                    {
                        Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j);
                    }
                }
            }
            reader.Dispose();

            dir.Dispose();
        }
예제 #6
0
        public virtual void TestDeletes1()
        {
            //IndexWriter.debug2 = System.out;
            Directory         dir = new MockDirectoryWrapper(new J2N.Randomizer(Random.NextInt64()), new RAMDirectory());
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergeScheduler(new SerialMergeScheduler());
            iwc.SetMaxBufferedDocs(5000);
            iwc.SetRAMBufferSizeMB(100);
            RangeMergePolicy fsmp = new RangeMergePolicy(this, false);

            iwc.SetMergePolicy(fsmp);
            IndexWriter writer = new IndexWriter(dir, iwc);

            for (int x = 0; x < 5; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "1", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit1");
            writer.Commit();
            Assert.AreEqual(1, writer.SegmentCount);
            for (int x = 5; x < 10; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "2", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit2");
            writer.Commit();
            Assert.AreEqual(2, writer.SegmentCount);

            for (int x = 10; x < 15; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "3", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }

            writer.DeleteDocuments(new Term("id", "1"));

            writer.DeleteDocuments(new Term("id", "11"));

            // flushing without applying deletes means
            // there will still be deletes in the segment infos
            writer.Flush(false, false);
            Assert.IsTrue(writer.bufferedUpdatesStream.Any());

            // get reader flushes pending deletes
            // so there should not be anymore
            IndexReader r1 = writer.GetReader();

            Assert.IsFalse(writer.bufferedUpdatesStream.Any());
            r1.Dispose();

            // delete id:2 from the first segment
            // merge segments 0 and 1
            // which should apply the delete id:2
            writer.DeleteDocuments(new Term("id", "2"));
            writer.Flush(false, false);
            fsmp         = (RangeMergePolicy)writer.Config.MergePolicy;
            fsmp.doMerge = true;
            fsmp.start   = 0;
            fsmp.length  = 2;
            writer.MaybeMerge();

            Assert.AreEqual(2, writer.SegmentCount);

            // id:2 shouldn't exist anymore because
            // it's been applied in the merge and now it's gone
            IndexReader r2 = writer.GetReader();

            int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2);
            Assert.IsTrue(id2docs is null);
            r2.Dispose();

            /*
             * /// // added docs are in the ram buffer
             * /// for (int x = 15; x < 20; x++) {
             * ///  writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2));
             * ///  System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
             * /// }
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * /// // delete from the ram buffer
             * /// writer.DeleteDocuments(new Term("id", Integer.toString(13)));
             * ///
             * /// Term id3 = new Term("id", Integer.toString(3));
             * ///
             * /// // delete from the 1st segment
             * /// writer.DeleteDocuments(id3);
             * ///
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * ///
             * /// //System.out
             * /// //    .println("segdels1:" + writer.docWriter.deletesToString());
             * ///
             * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
             * ///
             * /// // we cause a merge to happen
             * /// fsmp.doMerge = true;
             * /// fsmp.start = 0;
             * /// fsmp.length = 2;
             * /// System.out.println("maybeMerge "+writer.SegmentInfos);
             * ///
             * /// SegmentInfo info0 = writer.SegmentInfos[0];
             * /// SegmentInfo info1 = writer.SegmentInfos[1];
             * ///
             * /// writer.MaybeMerge();
             * /// System.out.println("maybeMerge after "+writer.SegmentInfos);
             * /// // there should be docs in RAM
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * ///
             * /// // assert we've merged the 1 and 2 segments
             * /// // and still have a segment leftover == 2
             * /// Assert.AreEqual(2, writer.SegmentInfos.Size());
             * /// Assert.IsFalse(segThere(info0, writer.SegmentInfos));
             * /// Assert.IsFalse(segThere(info1, writer.SegmentInfos));
             * ///
             * /// //System.out.println("segdels2:" + writer.docWriter.deletesToString());
             * ///
             * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
             * ///
             * /// IndexReader r = writer.GetReader();
             * /// IndexReader r1 = r.getSequentialSubReaders()[0];
             * /// printDelDocs(r1.GetLiveDocs());
             * /// int[] docs = toDocsArray(id3, null, r);
             * /// System.out.println("id3 docs:"+Arrays.toString(docs));
             * /// // there shouldn't be any docs for id:3
             * /// Assert.IsTrue(docs is null);
             * /// r.Dispose();
             * ///
             * /// part2(writer, fsmp);
             * ///
             */
            // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
            //System.out.println("close");
            writer.Dispose();
            dir.Dispose();
        }
예제 #7
0
        protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled)
        {
            int                    numDocumentsToIndex = 10 + AtLeast(30);
            AtomicInt32            numDocs             = new AtomicInt32(numDocumentsToIndex);
            Directory              dir         = NewDirectory();
            MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
            MockAnalyzer           analyzer    = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy);
            int numDWPT           = 1 + AtLeast(2);
            DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);

            iwc.SetIndexerThreadPool(threadPool);
            iwc.SetRAMBufferSizeMB(maxRamMB);
            iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            IndexWriter writer = new IndexWriter(dir, iwc);

            flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
            Assert.IsFalse(flushPolicy.FlushOnDocCount);
            Assert.IsFalse(flushPolicy.FlushOnDeleteTerms);
            Assert.IsTrue(flushPolicy.FlushOnRAM);
            DocumentsWriter docsWriter = writer.DocsWriter;

            Assert.IsNotNull(docsWriter);
            DocumentsWriterFlushControl flushControl = docsWriter.flushControl;

            Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init");

            IndexThread[] threads = new IndexThread[numThreads];
            for (int x = 0; x < threads.Length; x++)
            {
                threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false);
                threads[x].Start();
            }

            for (int x = 0; x < threads.Length; x++)
            {
                threads[x].Join();
            }
            long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0);

            Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due numThreads=" + numThreads);
            Assert.AreEqual(numDocumentsToIndex, writer.NumDocs);
            Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
            Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark");
            AssertActiveBytesAfter(flushControl);
            if (flushPolicy.HasMarkedPending)
            {
                Assert.IsTrue(maxRAMBytes < flushControl.peakActiveBytes);
            }
            if (ensureNotStalled)
            {
                Assert.IsFalse(docsWriter.flushControl.stallControl.WasStalled);
            }
            writer.Dispose();
            Assert.AreEqual(0, flushControl.ActiveBytes);
            dir.Dispose();
        }
예제 #8
0
        public virtual void DoTestLongPostingsNoPositions(IndexOptions options)
        {
            // Don't use TestUtil.getTempDir so that we own the
            // randomness (ie same seed will point to same dir):
            Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random.NextInt64()));

            int NUM_DOCS = AtLeast(2000);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS);
            }

            string s1 = GetRandomTerm(null);
            string s2 = GetRandomTerm(s1);

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2);

                /*
                 * for(int idx=0;idx<s1.Length();idx++) {
                 * System.out.println("  s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
                 * }
                 * for(int idx=0;idx<s2.Length();idx++) {
                 * System.out.println("  s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
                 * }
                 */
            }

            FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);

            for (int idx = 0; idx < NUM_DOCS; idx++)
            {
                if (Random.NextBoolean())
                {
                    isS1.Set(idx);
                }
            }

            IndexReader r;

            if (true)
            {
                IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy());
                iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random.NextDouble());
                iwc.SetMaxBufferedDocs(-1);
                RandomIndexWriter riw = new RandomIndexWriter(Random, dir, iwc);

                FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
                ft.IndexOptions = options;
                for (int idx = 0; idx < NUM_DOCS; idx++)
                {
                    Document doc   = new Document();
                    string   s     = isS1.Get(idx) ? s1 : s2;
                    Field    f     = NewField("field", s, ft);
                    int      count = TestUtil.NextInt32(Random, 1, 4);
                    for (int ct = 0; ct < count; ct++)
                    {
                        doc.Add(f);
                    }
                    riw.AddDocument(doc);
                }

                r = riw.GetReader();
                riw.Dispose();
            }
            else
            {
#pragma warning disable 162
                r = DirectoryReader.Open(dir);
#pragma warning restore 162
            }

            /*
             * if (VERBOSE) {
             * System.out.println("TEST: terms");
             * TermEnum termEnum = r.Terms();
             * while(termEnum.Next()) {
             *  System.out.println("  term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length());
             *  Assert.IsTrue(termEnum.DocFreq() > 0);
             *  System.out.println("    s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length());
             *  System.out.println("    s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length());
             *  final String s = termEnum.Term().Text();
             *  for(int idx=0;idx<s.Length();idx++) {
             *    System.out.println("      ch=0x" + Integer.toHexString(s.charAt(idx)));
             *  }
             * }
             * }
             */

            Assert.AreEqual(NUM_DOCS, r.NumDocs);
            Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0);
            Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0);

            int num = AtLeast(1000);
            for (int iter = 0; iter < num; iter++)
            {
                string term;
                bool   doS1;
                if (Random.NextBoolean())
                {
                    term = s1;
                    doS1 = true;
                }
                else
                {
                    term = s2;
                    doS1 = false;
                }

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term);
                }

                DocsEnum docs;
                DocsEnum postings;

                if (options == IndexOptions.DOCS_ONLY)
                {
                    docs     = TestUtil.Docs(Random, r, "field", new BytesRef(term), null, null, DocsFlags.NONE);
                    postings = null;
                }
                else
                {
                    docs = postings = TestUtil.Docs(Random, r, "field", new BytesRef(term), null, null, DocsFlags.FREQS);
                    Debug.Assert(postings != null);
                }
                Debug.Assert(docs != null);

                int docID = -1;
                while (docID < DocIdSetIterator.NO_MORE_DOCS)
                {
                    int what = Random.Next(3);
                    if (what == 0)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: docID=" + docID + "; do next()");
                        }
                        // nextDoc
                        int expected = docID + 1;
                        while (true)
                        {
                            if (expected == NUM_DOCS)
                            {
                                expected = int.MaxValue;
                                break;
                            }
                            else if (isS1.Get(expected) == doS1)
                            {
                                break;
                            }
                            else
                            {
                                expected++;
                            }
                        }
                        docID = docs.NextDoc();
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got docID=" + docID);
                        }
                        Assert.AreEqual(expected, docID);
                        if (docID == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }

                        if (Random.Next(6) == 3 && postings != null)
                        {
                            int freq = postings.Freq;
                            Assert.IsTrue(freq >= 1 && freq <= 4);
                        }
                    }
                    else
                    {
                        // advance
                        int targetDocID;
                        if (docID == -1)
                        {
                            targetDocID = Random.Next(NUM_DOCS + 1);
                        }
                        else
                        {
                            targetDocID = docID + TestUtil.NextInt32(Random, 1, NUM_DOCS - docID);
                        }
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
                        }
                        int expected = targetDocID;
                        while (true)
                        {
                            if (expected == NUM_DOCS)
                            {
                                expected = int.MaxValue;
                                break;
                            }
                            else if (isS1.Get(expected) == doS1)
                            {
                                break;
                            }
                            else
                            {
                                expected++;
                            }
                        }

                        docID = docs.Advance(targetDocID);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got docID=" + docID);
                        }
                        Assert.AreEqual(expected, docID);
                        if (docID == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }

                        if (Random.Next(6) == 3 && postings != null)
                        {
                            int freq = postings.Freq;
                            Assert.IsTrue(freq >= 1 && freq <= 4, "got invalid freq=" + freq);
                        }
                    }
                }
            }
            r.Dispose();
            dir.Dispose();
        }
예제 #9
0
        public virtual void TestInvalidValues()
        {
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Test IndexDeletionPolicy
            Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.IndexDeletionPolicy.GetType());
            conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null));
            Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.IndexDeletionPolicy.GetType());
            try
            {
                conf.SetIndexDeletionPolicy(null);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // ok
            }

            // Test MergeScheduler
#if !FEATURE_CONCURRENTMERGESCHEDULER
            Assert.AreEqual(typeof(TaskMergeScheduler), conf.MergeScheduler.GetType());
#else
            Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType());
#endif
            conf.SetMergeScheduler(new SerialMergeScheduler());
            Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType());
            try
            {
                conf.SetMergeScheduler(null);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // ok
            }

            // Test Similarity:
            // we shouldnt assert what the default is, just that its not null.
            Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity);
            conf.SetSimilarity(new MySimilarity());
            Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType());
            try
            {
                conf.SetSimilarity(null);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // ok
            }

            // Test IndexingChain
            Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain);
            conf.SetIndexingChain(new MyIndexingChain());
            Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType());
            try
            {
                conf.SetIndexingChain(null);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // ok
            }

            try
            {
                conf.SetMaxBufferedDeleteTerms(0);
                Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            try
            {
                conf.SetMaxBufferedDocs(1);
                Assert.Fail("should not have succeeded to set maxBufferedDocs to 1");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            try
            {
                // Disable both MAX_BUF_DOCS and RAM_SIZE_MB
                conf.SetMaxBufferedDocs(4);
                conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
            try
            {
                conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            // Test setReaderTermsIndexDivisor
            try
            {
                conf.SetReaderTermsIndexDivisor(0);
                Assert.Fail("should not have succeeded to set termsIndexDivisor to 0");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            // Setting to -1 is ok
            conf.SetReaderTermsIndexDivisor(-1);
            try
            {
                conf.SetReaderTermsIndexDivisor(-2);
                Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            try
            {
                conf.SetRAMPerThreadHardLimitMB(2048);
                Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            try
            {
                conf.SetRAMPerThreadHardLimitMB(0);
                Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            // Test MergePolicy
            Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType());
            conf.SetMergePolicy(new LogDocMergePolicy());
            Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType());
            try
            {
                conf.SetMergePolicy(null);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // ok
            }
        }
예제 #10
0
        public virtual void TestLongPostings_Mem()
        {
            // Don't use TestUtil.getTempDir so that we own the
            // randomness (ie same seed will point to same dir):
            Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random.NextInt64()));

            int NUM_DOCS = AtLeast(2000);

            if (Verbose)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS);
            }

            string s1 = GetRandomTerm(null);
            string s2 = GetRandomTerm(s1);

            if (Verbose)
            {
                Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2);

                /*
                 * for(int idx=0;idx<s1.length();idx++) {
                 * System.out.println("  s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
                 * }
                 * for(int idx=0;idx<s2.length();idx++) {
                 * System.out.println("  s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
                 * }
                 */
            }

            FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);

            for (int idx = 0; idx < NUM_DOCS; idx++)
            {
                if (Random.NextBoolean())
                {
                    isS1.Set(idx);
                }
            }

            IndexReader       r;
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy());

            iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random.NextDouble());
            iwc.SetMaxBufferedDocs(-1);
            RandomIndexWriter riw = new RandomIndexWriter(Random, dir, iwc);

            for (int idx = 0; idx < NUM_DOCS; idx++)
            {
                Document doc   = new Document();
                string   s     = isS1.Get(idx) ? s1 : s2;
                Field    f     = NewTextField("field", s, Field.Store.NO);
                int      count = TestUtil.NextInt32(Random, 1, 4);
                for (int ct = 0; ct < count; ct++)
                {
                    doc.Add(f);
                }
                riw.AddDocument(doc);
            }

            r = riw.GetReader();
            riw.Dispose();

            /*
             * if (VERBOSE) {
             * System.out.println("TEST: terms");
             * TermEnum termEnum = r.Terms();
             * while(termEnum.Next()) {
             *  System.out.println("  term=" + termEnum.Term() + " len=" + termEnum.Term().Text().length());
             *  Assert.IsTrue(termEnum.DocFreq() > 0);
             *  System.out.println("    s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.length());
             *  System.out.println("    s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.length());
             *  final String s = termEnum.Term().Text();
             *  for(int idx=0;idx<s.length();idx++) {
             *    System.out.println("      ch=0x" + Integer.toHexString(s.charAt(idx)));
             *  }
             * }
             * }
             */

            Assert.AreEqual(NUM_DOCS, r.NumDocs);
            Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0);
            Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0);

            int num = AtLeast(1000);

            for (int iter = 0; iter < num; iter++)
            {
                string term;
                bool   doS1;
                if (Random.NextBoolean())
                {
                    term = s1;
                    doS1 = true;
                }
                else
                {
                    term = s2;
                    doS1 = false;
                }

                if (Verbose)
                {
                    Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1);
                }

                DocsAndPositionsEnum postings = MultiFields.GetTermPositionsEnum(r, null, "field", new BytesRef(term));

                int docID = -1;
                while (docID < DocIdSetIterator.NO_MORE_DOCS)
                {
                    int what = Random.Next(3);
                    if (what == 0)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: docID=" + docID + "; do next()");
                        }
                        // nextDoc
                        int expected = docID + 1;
                        while (true)
                        {
                            if (expected == NUM_DOCS)
                            {
                                expected = int.MaxValue;
                                break;
                            }
                            else if (isS1.Get(expected) == doS1)
                            {
                                break;
                            }
                            else
                            {
                                expected++;
                            }
                        }
                        docID = postings.NextDoc();
                        if (Verbose)
                        {
                            Console.WriteLine("  got docID=" + docID);
                        }
                        Assert.AreEqual(expected, docID);
                        if (docID == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }

                        if (Random.Next(6) == 3)
                        {
                            int freq = postings.Freq;
                            Assert.IsTrue(freq >= 1 && freq <= 4);
                            for (int pos = 0; pos < freq; pos++)
                            {
                                Assert.AreEqual(pos, postings.NextPosition());
                                if (Random.NextBoolean())
                                {
                                    var dummy = postings.GetPayload();
                                    if (Random.NextBoolean())
                                    {
                                        dummy = postings.GetPayload(); // get it again
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        // advance
                        int targetDocID;
                        if (docID == -1)
                        {
                            targetDocID = Random.Next(NUM_DOCS + 1);
                        }
                        else
                        {
                            targetDocID = docID + TestUtil.NextInt32(Random, 1, NUM_DOCS - docID);
                        }
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
                        }
                        int expected = targetDocID;
                        while (true)
                        {
                            if (expected == NUM_DOCS)
                            {
                                expected = int.MaxValue;
                                break;
                            }
                            else if (isS1.Get(expected) == doS1)
                            {
                                break;
                            }
                            else
                            {
                                expected++;
                            }
                        }

                        docID = postings.Advance(targetDocID);
                        if (Verbose)
                        {
                            Console.WriteLine("  got docID=" + docID);
                        }
                        Assert.AreEqual(expected, docID);
                        if (docID == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }

                        if (Random.Next(6) == 3)
                        {
                            int freq = postings.Freq;
                            Assert.IsTrue(freq >= 1 && freq <= 4);
                            for (int pos = 0; pos < freq; pos++)
                            {
                                Assert.AreEqual(pos, postings.NextPosition());
                                if (Random.NextBoolean())
                                {
                                    var dummy = postings.GetPayload();
                                    if (Random.NextBoolean())
                                    {
                                        dummy = postings.GetPayload(); // get it again
                                    }
                                }
                            }
                        }
                    }
                }
            }
            r.Dispose();
            dir.Dispose();
        }
예제 #11
0
        public virtual void TestDeletes1()
        {
            //IndexWriter.debug2 = System.out;
            Directory dir = new MockDirectoryWrapper(new Random(Random().Next()), new RAMDirectory());
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergeScheduler(new SerialMergeScheduler());
            iwc.SetMaxBufferedDocs(5000);
            iwc.SetRAMBufferSizeMB(100);
            RangeMergePolicy fsmp = new RangeMergePolicy(this, false);
            iwc.SetMergePolicy(fsmp);
            IndexWriter writer = new IndexWriter(dir, iwc);
            for (int x = 0; x < 5; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "1", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit1");
            writer.Commit();
            Assert.AreEqual(1, writer.SegmentCount);
            for (int x = 5; x < 10; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "2", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit2");
            writer.Commit();
            Assert.AreEqual(2, writer.SegmentCount);

            for (int x = 10; x < 15; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "3", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }

            writer.DeleteDocuments(new Term("id", "1"));

            writer.DeleteDocuments(new Term("id", "11"));

            // flushing without applying deletes means
            // there will still be deletes in the segment infos
            writer.Flush(false, false);
            Assert.IsTrue(writer.BufferedUpdatesStreamAny);

            // get reader flushes pending deletes
            // so there should not be anymore
            IndexReader r1 = writer.Reader;
            Assert.IsFalse(writer.BufferedUpdatesStreamAny);
            r1.Dispose();

            // delete id:2 from the first segment
            // merge segments 0 and 1
            // which should apply the delete id:2
            writer.DeleteDocuments(new Term("id", "2"));
            writer.Flush(false, false);
            fsmp = (RangeMergePolicy)writer.Config.MergePolicy;
            fsmp.DoMerge = true;
            fsmp.Start = 0;
            fsmp.Length = 2;
            writer.MaybeMerge();

            Assert.AreEqual(2, writer.SegmentCount);

            // id:2 shouldn't exist anymore because
            // it's been applied in the merge and now it's gone
            IndexReader r2 = writer.Reader;
            int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2);
            Assert.IsTrue(id2docs == null);
            r2.Dispose();

            /*
            /// // added docs are in the ram buffer
            /// for (int x = 15; x < 20; x++) {
            ///  writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2));
            ///  System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            /// }
            /// Assert.IsTrue(writer.numRamDocs() > 0);
            /// // delete from the ram buffer
            /// writer.DeleteDocuments(new Term("id", Integer.toString(13)));
            ///
            /// Term id3 = new Term("id", Integer.toString(3));
            ///
            /// // delete from the 1st segment
            /// writer.DeleteDocuments(id3);
            ///
            /// Assert.IsTrue(writer.numRamDocs() > 0);
            ///
            /// //System.out
            /// //    .println("segdels1:" + writer.docWriter.deletesToString());
            ///
            /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
            ///
            /// // we cause a merge to happen
            /// fsmp.doMerge = true;
            /// fsmp.start = 0;
            /// fsmp.Length = 2;
            /// System.out.println("maybeMerge "+writer.SegmentInfos);
            ///
            /// SegmentInfo info0 = writer.SegmentInfos.Info(0);
            /// SegmentInfo info1 = writer.SegmentInfos.Info(1);
            ///
            /// writer.MaybeMerge();
            /// System.out.println("maybeMerge after "+writer.SegmentInfos);
            /// // there should be docs in RAM
            /// Assert.IsTrue(writer.numRamDocs() > 0);
            ///
            /// // assert we've merged the 1 and 2 segments
            /// // and still have a segment leftover == 2
            /// Assert.AreEqual(2, writer.SegmentInfos.Size());
            /// Assert.IsFalse(segThere(info0, writer.SegmentInfos));
            /// Assert.IsFalse(segThere(info1, writer.SegmentInfos));
            ///
            /// //System.out.println("segdels2:" + writer.docWriter.deletesToString());
            ///
            /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
            ///
            /// IndexReader r = writer.GetReader();
            /// IndexReader r1 = r.getSequentialSubReaders()[0];
            /// printDelDocs(r1.GetLiveDocs());
            /// int[] docs = toDocsArray(id3, null, r);
            /// System.out.println("id3 docs:"+Arrays.toString(docs));
            /// // there shouldn't be any docs for id:3
            /// Assert.IsTrue(docs == null);
            /// r.Dispose();
            ///
            /// part2(writer, fsmp);
            ///
            */
            // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
            //System.out.println("close");
            writer.Dispose();
            dir.Dispose();
        }