SetMergePolicy() public method

Expert: MergePolicy is invoked whenever there are changes to the segments in the index. Its role is to select which merges to do, if any, and return a MergePolicy.MergeSpecification describing the merges. It also selects merges to do for forceMerge.

Only takes effect when IndexWriter is first created.

public SetMergePolicy ( MergePolicy mergePolicy ) : IndexWriterConfig
mergePolicy MergePolicy
return IndexWriterConfig
        public virtual void TestSingleNonMergeableSegment()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3, true);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 3;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Verify that the last segment does not have deletions.
            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(1, sis.Count);
        }
        public virtual void TestNumDocsLimit()
        {
            // tests that the max merge docs constraint is applied during forceMerge.
            Directory dir = new RAMDirectory();

            // Prepare an index w/ several small segments and a large one.
            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 5);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 3;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 3 segments in the index, because one of them exceeds the size limit
            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(3, sis.Count);
        }
        public virtual void TestLastSegmentTooLarge()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 5);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 3;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(2, sis.Count);
        }
Example #4
0
        public virtual void TestAddNumericTwice()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document    doc     = new Document();

            doc.Add(new NumericDocValuesField("dv", 1));
            doc.Add(new NumericDocValuesField("dv", 2));
            try
            {
                iwriter.AddDocument(doc);
                Assert.Fail("didn't hit expected exception");
            }
            catch (System.ArgumentException expected)
            {
                // expected
            }

            iwriter.Dispose();
            directory.Dispose();
        }
Example #5
0
        public virtual void TestTooLargeTermSortedSetBytes()
        {
            AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet());
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document    doc     = new Document();

            byte[]   bytes = new byte[100000];
            BytesRef b     = new BytesRef(bytes);

            Random().NextBytes((byte[])(Array)bytes);
            doc.Add(new SortedSetDocValuesField("dv", b));
            try
            {
                iwriter.AddDocument(doc);
                Assert.Fail("did not get expected exception");
            }
            catch (System.ArgumentException expected)
            {
                // expected
            }
            iwriter.Dispose();
            directory.Dispose();
        }
Example #6
0
        public virtual void TestTooLargeSortedBytes()
        {
            Analyzer analyzer = new MockAnalyzer(Random);

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document    doc     = new Document();
            var         bytes   = new byte[100000];
            BytesRef    b       = new BytesRef(bytes);

            Random.NextBytes(bytes);
            doc.Add(new SortedDocValuesField("dv", b));
            try
            {
                iwriter.AddDocument(doc);
                Assert.Fail("did not get expected exception");
            }
            catch (Exception expected) when(expected.IsIllegalArgumentException())
            {
                // expected
            }
            iwriter.Dispose();
            directory.Dispose();
        }
Example #7
0
        public virtual void TestDocValuesUnstored()
        {
            Directory         dir      = NewDirectory();
            IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwconfig.SetMergePolicy(NewLogMergePolicy());
            IndexWriter writer = new IndexWriter(dir, iwconfig);

            for (int i = 0; i < 50; i++)
            {
                Document doc = new Document();
                doc.Add(new NumericDocValuesField("dv", i));
                doc.Add(new TextField("docId", "" + i, Field.Store.YES));
                writer.AddDocument(doc);
            }
            DirectoryReader r      = writer.Reader;
            AtomicReader    slow   = SlowCompositeReaderWrapper.Wrap(r);
            FieldInfos      fi     = slow.FieldInfos;
            FieldInfo       dvInfo = fi.FieldInfo("dv");

            Assert.IsTrue(dvInfo.HasDocValues());
            NumericDocValues dv = slow.GetNumericDocValues("dv");

            for (int i = 0; i < 50; i++)
            {
                Assert.AreEqual(i, dv.Get(i));
                Document d = slow.Document(i);
                // cannot use d.Get("dv") due to another bug!
                Assert.IsNull(d.GetField("dv"));
                Assert.AreEqual(Convert.ToString(i), d.Get("docId"));
            }
            slow.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Example #8
0
        public virtual void TestAddBinaryTwice()
        {
            Analyzer analyzer = new MockAnalyzer(Random);

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document    doc     = new Document();

            doc.Add(new BinaryDocValuesField("dv", new BytesRef("foo!")));
            doc.Add(new BinaryDocValuesField("dv", new BytesRef("bar!")));
            try
            {
                iwriter.AddDocument(doc);
                Assert.Fail("didn't hit expected exception");
            }
            catch (ArgumentOutOfRangeException) // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention)
            {
                // expected
            }

            iwriter.Dispose();
            directory.Dispose();
        }
        public virtual void TestIndexWriterDirtSimple()
        {
            Directory         dir = new RAMDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            TieredMergePolicy tmp = NewTieredMergePolicy();

            iwc.SetMergePolicy(tmp);
            iwc.SetMaxBufferedDocs(2);
            tmp.MaxMergeAtOnce              = 100;
            tmp.SegmentsPerTier             = 100;
            tmp.ForceMergeDeletesPctAllowed = 30.0;
            IndexWriter w = new IndexWriter(dir, iwc);

            int numDocs = 2;

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("content", "aaa " + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            Assert.AreEqual(numDocs, w.MaxDoc);
            Assert.AreEqual(numDocs, w.NumDocs);
        }
        public virtual void TestSingleMergeableTooLargeSegment()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 5, true);

            // delete the last document

            writer.DeleteDocuments(new Term("id", "4"));
            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 2;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Verify that the last segment does not have deletions.
            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(1, sis.Count);
            Assert.IsTrue(sis.Info(0).HasDeletions);
        }
Example #11
0
        public void TestEmptyChildFilter()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            config.SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
            // we don't want to merge - since we rely on certain segment setup
            IndexWriter w = new IndexWriter(dir, config);

            IList<Document> docs = new List<Document>();

            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeResume("Lisa", "United Kingdom"));
            w.AddDocuments(docs);

            docs.Clear();
            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeResume("Frank", "United States"));
            w.AddDocuments(docs);
            w.Commit();
            int num = AtLeast(10); // produce a segment that doesn't have a value in the docType field
            for (int i = 0; i < num; i++)
            {
                docs.Clear();
                docs.Add(MakeJob("java", 2007));
                w.AddDocuments(docs);
            }

            IndexReader r = DirectoryReader.Open(w, Random().NextBoolean());
            w.Dispose();
            assertTrue(r.Leaves.size() > 1);
            IndexSearcher s = new IndexSearcher(r);
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            BooleanQuery childQuery = new BooleanQuery();
            childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);

            BooleanQuery fullQuery = new BooleanQuery();
            fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));
            fullQuery.Add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST));
            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
            s.Search(fullQuery, c);
            TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
            assertFalse(float.IsNaN(results.MaxScore));
            assertEquals(1, results.TotalGroupedHitCount);
            assertEquals(1, results.Groups.Length);
            IGroupDocs<int> group = results.Groups[0];
            Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
            assertEquals("java", childDoc.Get("skill"));
            assertNotNull(group.GroupValue);
            Document parentDoc = s.Doc(group.GroupValue);
            assertEquals("Lisa", parentDoc.Get("name"));

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestMergeFactor()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 5);
            AddDocs(writer, 3);
            AddDocs(writer, 3);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 3;
            lmp.MergeFactor  = 2;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 4 segments in the index, because of the merge factor and
            // max merge docs settings.
            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(4, sis.Count);
        }
Example #13
0
        public override void SetUp()
        {
            base.SetUp();

            // for now its SimpleText vs Lucene46(random postings format)
            // as this gives the best overall coverage. when we have more
            // codecs we should probably pick 2 from Codec.availableCodecs()

            LeftCodec  = Codec.ForName("SimpleText");
            RightCodec = new RandomCodec(Random());

            LeftDir  = NewDirectory();
            RightDir = NewDirectory();

            long seed = Random().Next();

            // must use same seed because of random payloads, etc
            int          maxTermLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            MockAnalyzer leftAnalyzer  = new MockAnalyzer(new Random((int)seed));

            leftAnalyzer.MaxTokenLength = maxTermLength;
            MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random((int)seed));

            rightAnalyzer.MaxTokenLength = maxTermLength;

            // but these can be different
            // TODO: this turns this into a really big test of Multi*, is that what we want?
            IndexWriterConfig leftConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer);

            leftConfig.SetCodec(LeftCodec);
            // preserve docids
            leftConfig.SetMergePolicy(NewLogMergePolicy());

            IndexWriterConfig rightConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, rightAnalyzer);

            rightConfig.SetCodec(RightCodec);
            // preserve docids
            rightConfig.SetMergePolicy(NewLogMergePolicy());

            // must use same seed because of random docvalues fields, etc
            RandomIndexWriter leftWriter  = new RandomIndexWriter(new Random((int)seed), LeftDir, leftConfig);
            RandomIndexWriter rightWriter = new RandomIndexWriter(new Random((int)seed), RightDir, rightConfig);

            int numdocs = AtLeast(100);

            CreateRandomIndex(numdocs, leftWriter, seed);
            CreateRandomIndex(numdocs, rightWriter, seed);

            LeftReader = MaybeWrapReader(leftWriter.Reader);
            leftWriter.Dispose();
            RightReader = MaybeWrapReader(rightWriter.Reader);
            rightWriter.Dispose();

            // check that our readers are valid
            TestUtil.CheckReader(LeftReader);
            TestUtil.CheckReader(RightReader);

            Info = "left: " + LeftCodec.ToString() + " / right: " + RightCodec.ToString();
        }
Example #14
0
        public virtual void TestIntersectEmptyString()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(new LogDocMergePolicy());
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(NewStringField("field", "", Field.Store.NO));
            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            // add empty string to both documents, so that singletonDocID == -1.
            // For a FST-based term dict, we'll expect to see the first arc is
            // flaged with HAS_FINAL_OUTPUT
            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            doc.Add(NewStringField("field", "", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.GetReader();

            w.Dispose();
            AtomicReader sub   = GetOnlySegmentReader(r);
            Terms        terms = sub.Fields.GetTerms("field");

            Automaton         automaton = (new RegExp(".*", RegExpSyntax.NONE)).ToAutomaton(); // accept ALL
            CompiledAutomaton ca        = new CompiledAutomaton(automaton, false, false);

            TermsEnum te = terms.Intersect(ca, null);
            DocsEnum  de;

            Assert.AreEqual("", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsFlags.NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.AreEqual("abc", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsFlags.NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.IsNull(te.Next());

            // pass empty string
            te = terms.Intersect(ca, new BytesRef(""));

            Assert.AreEqual("abc", te.Next().Utf8ToString());
            de = te.Docs(null, null, DocsFlags.NONE);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(1, de.NextDoc());

            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
Example #15
0
        public virtual void TestSorted()
        {
            Directory dir   = NewDirectory();
            Document  doc   = new Document();
            BytesRef  @ref  = new BytesRef();
            Field     field = new SortedDocValuesField("bytes", @ref);

            doc.Add(field);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                @ref.CopyChars(TestUtil.RandomUnicodeString(Random()));
                if (DefaultCodecSupportsDocsWithField() && Random().Next(7) == 0)
                {
                    iw.AddDocument(new Document());
                }
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;

            iw.ForceMerge(1);
            DirectoryReader ir2    = iw.Reader;
            AtomicReader    merged = GetOnlySegmentReader(ir2);

            iw.Dispose();

            SortedDocValues multi  = MultiDocValues.GetSortedValues(ir, "bytes");
            SortedDocValues single = merged.GetSortedDocValues("bytes");

            Assert.AreEqual(single.ValueCount, multi.ValueCount);
            BytesRef actual   = new BytesRef();
            BytesRef expected = new BytesRef();

            for (int i = 0; i < numDocs; i++)
            {
                // check ord
                Assert.AreEqual(single.GetOrd(i), multi.GetOrd(i));
                // check value
                single.Get(i, expected);
                multi.Get(i, actual);
                Assert.AreEqual(expected, actual);
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
Example #16
0
        private IndexWriterConfig NewWriterConfig()
        {
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);

            conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            // prevent any merges by default.
            conf.SetMergePolicy(NoMergePolicy.COMPOUND_FILES);
            return(conf);
        }
Example #17
0
        public virtual void TestIntersectBasic()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(new LogDocMergePolicy());
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(NewTextField("field", "aaa", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "bbb", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("field", "ccc", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.GetReader();

            w.Dispose();
            AtomicReader      sub       = GetOnlySegmentReader(r);
            Terms             terms     = sub.Fields.GetTerms("field");
            Automaton         automaton = (new RegExp(".*", RegExpSyntax.NONE)).ToAutomaton();
            CompiledAutomaton ca        = new CompiledAutomaton(automaton, false, false);
            TermsEnum         te        = terms.Intersect(ca, null);

            Assert.AreEqual("aaa", te.Next().Utf8ToString());
            Assert.AreEqual(0, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("bbb", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("ccc", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            te = terms.Intersect(ca, new BytesRef("abc"));
            Assert.AreEqual("bbb", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("ccc", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            te = terms.Intersect(ca, new BytesRef("aaa"));
            Assert.AreEqual("bbb", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("ccc", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestForceMergeDeletesMaxSegSize()
        {
            Directory         dir  = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            TieredMergePolicy tmp  = new TieredMergePolicy();

            tmp.MaxMergedSegmentMB          = 0.01;
            tmp.ForceMergeDeletesPctAllowed = 0.0;
            conf.SetMergePolicy(tmp);

            RandomIndexWriter w = new RandomIndexWriter(Random, dir, conf);

            w.DoRandomForceMerge = false;

            int numDocs = AtLeast(200);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("id", "" + i, Field.Store.NO));
                doc.Add(NewTextField("content", "aaa " + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            w.ForceMerge(1);
            IndexReader r = w.GetReader();

            Assert.AreEqual(numDocs, r.MaxDoc);
            Assert.AreEqual(numDocs, r.NumDocs);
            r.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: delete doc");
            }

            w.DeleteDocuments(new Term("id", "" + (42 + 17)));

            r = w.GetReader();
            Assert.AreEqual(numDocs, r.MaxDoc);
            Assert.AreEqual(numDocs - 1, r.NumDocs);
            r.Dispose();

            w.ForceMergeDeletes();

            r = w.GetReader();
            Assert.AreEqual(numDocs - 1, r.MaxDoc);
            Assert.AreEqual(numDocs - 1, r.NumDocs);
            r.Dispose();

            w.Dispose();

            dir.Dispose();
        }
        public virtual void TestMixupDocs()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer     = new RandomIndexWriter(Random, dir, iwc);
            Document          doc        = new Document();
            FieldType         customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.StoreTermVectors         = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorPayloads  = true;
            customType.StoreTermVectorOffsets   = Random.NextBoolean();
            Field       field = new Field("field", "", customType);
            TokenStream ts    = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            writer.AddDocument(doc);

            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);

            ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);

            DirectoryReader reader = writer.GetReader();
            Terms           terms  = reader.GetTermVector(1, "field");

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(terms != null);
            }
            TermsEnum termsEnum = terms.GetIterator(null);

            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
            DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);

            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(0, de.NextPosition());
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
Example #20
0
        private static IndexWriter GetWriter(Directory directory)
        {
            MergePolicy       policy = new LogByteSizeMergePolicy();
            IndexWriterConfig conf   = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            conf.SetMergePolicy(policy);
            conf.SetOpenMode(OpenMode.CREATE_OR_APPEND);

            IndexWriter writer = new IndexWriter(directory, conf);

            return(writer);
        }
Example #21
0
        public virtual void TestSepPositionAfterMerge()
        {
            Directory         dir    = NewDirectory();
            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            config.SetMergePolicy(NewLogMergePolicy());
            config.SetCodec(TestUtil.AlwaysPostingsFormat(new MockSepPostingsFormat()));
            IndexWriter writer = new IndexWriter(dir, config);

            try
            {
                PhraseQuery pq = new PhraseQuery();
                pq.Add(new Term("content", "bbb"));
                pq.Add(new Term("content", "ccc"));

                Document  doc        = new Document();
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.OmitNorms = true;
                doc.Add(NewField("content", "aaa bbb ccc ddd", customType));

                // add document and force commit for creating a first segment
                writer.AddDocument(doc);
                writer.Commit();

                ScoreDoc[] results = this.Search(writer, pq, 5);
                Assert.AreEqual(1, results.Length);
                Assert.AreEqual(0, results[0].Doc);

                // add document and force commit for creating a second segment
                writer.AddDocument(doc);
                writer.Commit();

                // at this point, there should be at least two segments
                results = this.Search(writer, pq, 5);
                Assert.AreEqual(2, results.Length);
                Assert.AreEqual(0, results[0].Doc);

                writer.ForceMerge(1);

                // optimise to merge the segments.
                results = this.Search(writer, pq, 5);
                Assert.AreEqual(2, results.Length);
                Assert.AreEqual(0, results[0].Doc);
            }
            finally
            {
                writer.Dispose();
                dir.Dispose();
            }
        }
Example #22
0
        public virtual void TestBinary()
        {
            Directory dir   = NewDirectory();
            Document  doc   = new Document();
            BytesRef  @ref  = new BytesRef();
            Field     field = new BinaryDocValuesField("bytes", @ref);

            doc.Add(field);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                @ref.CopyChars(TestUtil.RandomUnicodeString(Random));
                iw.AddDocument(doc);
                if (Random.Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.GetReader();

            iw.ForceMerge(1);
            DirectoryReader ir2    = iw.GetReader();
            AtomicReader    merged = GetOnlySegmentReader(ir2);

            iw.Dispose();

            BinaryDocValues multi    = MultiDocValues.GetBinaryValues(ir, "bytes");
            BinaryDocValues single   = merged.GetBinaryDocValues("bytes");
            BytesRef        actual   = new BytesRef();
            BytesRef        expected = new BytesRef();

            for (int i = 0; i < numDocs; i++)
            {
                single.Get(i, expected);
                multi.Get(i, actual);
                Assert.AreEqual(expected, actual);
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
        public virtual void TestPartialMerge()
        {
            int num = AtLeast(10);

            for (int iter = 0; iter < num; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                Directory         dir  = NewDirectory();
                IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
                conf.SetMergeScheduler(new SerialMergeScheduler());
                TieredMergePolicy tmp = NewTieredMergePolicy();
                conf.SetMergePolicy(tmp);
                conf.SetMaxBufferedDocs(2);
                tmp.MaxMergeAtOnce  = 3;
                tmp.SegmentsPerTier = 6;

                IndexWriter w        = new IndexWriter(dir, conf);
                int         maxCount = 0;
                int         numDocs  = TestUtil.NextInt32(Random, 20, 100);
                for (int i = 0; i < numDocs; i++)
                {
                    Document doc = new Document();
                    doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO));
                    w.AddDocument(doc);
                    int count = w.SegmentCount;
                    maxCount = Math.Max(count, maxCount);
                    Assert.IsTrue(count >= maxCount - 3, "count=" + count + " maxCount=" + maxCount);
                }

                w.Flush(true, true);

                int segmentCount = w.SegmentCount;
                int targetCount  = TestUtil.NextInt32(Random, 1, segmentCount);
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: merge to " + targetCount + " segs (current count=" + segmentCount + ")");
                }
                w.ForceMerge(targetCount);
                Assert.AreEqual(targetCount, w.SegmentCount);

                w.Dispose();
                dir.Dispose();
            }
        }
        public virtual DirectoryInfo CreateIndex(string dirName, bool doCFS, bool fullyMerged)
        {
            // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes:
            DirectoryInfo indexDir = new DirectoryInfo(Path.Combine("/tmp/4x/", dirName));

            TestUtil.Rm(indexDir);
            Directory dir             = NewFSDirectory(indexDir);
            LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();

            mp.NoCFSRatio          = doCFS ? 1.0 : 0.0;
            mp.MaxCFSSegmentSizeMB = double.PositiveInfinity;
            // TODO: remove randomness
            IndexWriterConfig conf   = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMaxBufferedDocs(10).SetMergePolicy(mp).SetUseCompoundFile(doCFS);
            IndexWriter       writer = new IndexWriter(dir, conf);

            for (int i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            Assert.AreEqual(35, writer.MaxDoc, "wrong doc count");
            if (fullyMerged)
            {
                writer.ForceMerge(1);
            }
            writer.Dispose();

            if (!fullyMerged)
            {
                // open fresh writer so we get no prx file in the added segment
                mp            = new LogByteSizeMergePolicy();
                mp.NoCFSRatio = doCFS ? 1.0 : 0.0;
                // TODO: remove randomness
                conf   = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMaxBufferedDocs(10).SetMergePolicy(mp).SetUseCompoundFile(doCFS);
                writer = new IndexWriter(dir, conf);
                AddNoProxDoc(writer);
                writer.Dispose();

                writer = new IndexWriter(dir, conf.SetMergePolicy(doCFS ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES));
                Term searchTerm = new Term("id", "7");
                writer.DeleteDocuments(searchTerm);
                writer.Dispose();
            }

            dir.Dispose();

            return(indexDir);
        }
Example #25
0
        public virtual void TestNumerics()
        {
            Directory dir   = NewDirectory();
            Document  doc   = new Document();
            Field     field = new NumericDocValuesField("numbers", 0);

            doc.Add(field);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                field.SetInt64Value(Random().NextLong());
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;

            iw.ForceMerge(1);
            DirectoryReader ir2    = iw.Reader;
            AtomicReader    merged = GetOnlySegmentReader(ir2);

            iw.Dispose();

            NumericDocValues multi  = MultiDocValues.GetNumericValues(ir, "numbers");
            NumericDocValues single = merged.GetNumericDocValues("numbers");

            for (int i = 0; i < numDocs; i++)
            {
                Assert.AreEqual(single.Get(i), multi.Get(i));
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
        public virtual void TestForceMergeDeletes()
        {
            Directory         dir  = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            TieredMergePolicy tmp  = NewTieredMergePolicy();

            conf.SetMergePolicy(tmp);
            conf.SetMaxBufferedDocs(4);
            tmp.MaxMergeAtOnce              = 100;
            tmp.SegmentsPerTier             = 100;
            tmp.ForceMergeDeletesPctAllowed = 30.0;
            IndexWriter w = new IndexWriter(dir, conf);

            for (int i = 0; i < 80; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO));
                w.AddDocument(doc);
            }
            Assert.AreEqual(80, w.MaxDoc);
            Assert.AreEqual(80, w.NumDocs);

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: delete docs");
            }
            w.DeleteDocuments(new Term("content", "0"));
            w.ForceMergeDeletes();

            Assert.AreEqual(80, w.MaxDoc);
            Assert.AreEqual(60, w.NumDocs);

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: forceMergeDeletes2");
            }
            ((TieredMergePolicy)w.Config.MergePolicy).ForceMergeDeletesPctAllowed = 10.0;
            w.ForceMergeDeletes();
            Assert.AreEqual(60, w.NumDocs);
            Assert.AreEqual(60, w.MaxDoc);
            w.Dispose();
            dir.Dispose();
        }
Example #27
0
        public virtual void TestMaxMergeCount()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            int            maxMergeCount       = TestUtil.NextInt(Random(), 1, 5);
            int            maxMergeThreads     = TestUtil.NextInt(Random(), 1, maxMergeCount);
            CountdownEvent enoughMergesWaiting = new CountdownEvent(maxMergeCount);
            AtomicInteger  runningMergeCount   = new AtomicInteger(0);
            AtomicBoolean  failed = new AtomicBoolean();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: maxMergeCount=" + maxMergeCount + " maxMergeThreads=" + maxMergeThreads);
            }

            ConcurrentMergeScheduler cms = new ConcurrentMergeSchedulerAnonymousInnerClassHelper(this, maxMergeCount, enoughMergesWaiting, runningMergeCount, failed);

            cms.SetMaxMergesAndThreads(maxMergeCount, maxMergeThreads);
            iwc.SetMergeScheduler(cms);
            iwc.SetMaxBufferedDocs(2);

            TieredMergePolicy tmp = new TieredMergePolicy();

            iwc.SetMergePolicy(tmp);
            tmp.MaxMergeAtOnce  = 2;
            tmp.SegmentsPerTier = 2;

            IndexWriter w   = new IndexWriter(dir, iwc);
            Document    doc = new Document();

            doc.Add(NewField("field", "field", TextField.TYPE_NOT_STORED));
            while (enoughMergesWaiting.CurrentCount != 0 && !failed.Get())
            {
                for (int i = 0; i < 10; i++)
                {
                    w.AddDocument(doc);
                }
            }
            w.Dispose(false);
            dir.Dispose();
        }
        public virtual void TestLiveChangeToCFS()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy(true));
            // Start false:
            iwc.SetUseCompoundFile(false);
            iwc.MergePolicy.NoCFSRatio = 0.0d;
            IndexWriter w = new IndexWriter(dir, iwc);

            // Change to true:
            w.Config.SetUseCompoundFile(true);

            Document doc = new Document();

            doc.Add(NewStringField("field", "foo", Store.NO));
            w.AddDocument(doc);
            w.Commit();
            Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after commit");

            doc.Add(NewStringField("field", "foo", Store.NO));
            w.AddDocument(doc);
            w.Commit();
            w.ForceMerge(1);
            w.Commit();

            // no compound files after merge
            Assert.IsFalse(w.NewestSegment().Info.UseCompoundFile, "Expected Non-CFS after merge");

            MergePolicy lmp = w.Config.MergePolicy;

            lmp.NoCFSRatio          = 1.0;
            lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity;

            w.AddDocument(doc);
            w.ForceMerge(1);
            w.Commit();
            Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after merge");
            w.Dispose();
            dir.Dispose();
        }
Example #29
0
        /// <summary>
        /// Perform the upgrade. </summary>
        public void Upgrade()
        {
            if (!DirectoryReader.IndexExists(Dir))
            {
                throw new IndexNotFoundException(Dir.ToString());
            }

            if (!DeletePriorCommits)
            {
                ICollection <IndexCommit> commits = DirectoryReader.ListCommits(Dir);
                if (commits.Count > 1)
                {
                    throw new System.ArgumentException("this tool was invoked to not delete prior commit points, but the following commits were found: " + commits);
                }
            }

            IndexWriterConfig c = (IndexWriterConfig)Iwc.Clone();

            c.SetMergePolicy(new UpgradeIndexMergePolicy(c.MergePolicy));
            c.SetIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());

            IndexWriter w = new IndexWriter(Dir, c);

            try
            {
                InfoStream infoStream = c.InfoStream;
                if (infoStream.IsEnabled("IndexUpgrader"))
                {
                    infoStream.Message("IndexUpgrader", "Upgrading all pre-" + Constants.LUCENE_MAIN_VERSION + " segments of index directory '" + Dir + "' to version " + Constants.LUCENE_MAIN_VERSION + "...");
                }
                w.ForceMerge(1);
                if (infoStream.IsEnabled("IndexUpgrader"))
                {
                    infoStream.Message("IndexUpgrader", "All segments upgraded to version " + Constants.LUCENE_MAIN_VERSION);
                }
            }
            finally
            {
                w.Dispose();
            }
        }
Example #30
0
        public override void SetUp()
        {
            base.SetUp();
            dir = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true);
            IndexWriterConfig config   = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            config.SetMergePolicy(NewLogMergePolicy());
            config.SetSimilarity(new TestSimilarity());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config);
            Document          doc    = new Document();
            Field             foo    = NewTextField("foo", "", Field.Store.NO);

            doc.Add(foo);
            for (int i = 0; i < 100; i++)
            {
                foo.StringValue = AddValue();
                writer.AddDocument(doc);
            }
            reader = writer.Reader;
            writer.Dispose();
        }
Example #31
0
        public virtual void TestMixupDocs()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc);
            Document          doc    = new Document();
            Field             field  = new TextField("field", "", Field.Store.NO);
            TokenStream       ts     = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            writer.AddDocument(doc);
            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);
            ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);
            DirectoryReader      reader = writer.GetReader();
            AtomicReader         sr     = SlowCompositeReaderWrapper.Wrap(reader);
            DocsAndPositionsEnum de     = sr.GetTermPositionsEnum(new Term("field", "withPayload"));

            de.NextDoc();
            de.NextPosition();
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestLiveChangeToCFS()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(NewLogMergePolicy(true));
            // Start false:
            iwc.SetUseCompoundFile(false);
            iwc.MergePolicy.NoCFSRatio = 0.0d;
            IndexWriter w = new IndexWriter(dir, iwc);
            // Change to true:
            w.Config.SetUseCompoundFile(true);

            Document doc = new Document();
            doc.Add(NewStringField("field", "foo", Store.NO));
            w.AddDocument(doc);
            w.Commit();
            Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after commit");

            doc.Add(NewStringField("field", "foo", Store.NO));
            w.AddDocument(doc);
            w.Commit();
            w.ForceMerge(1);
            w.Commit();

            // no compound files after merge
            Assert.IsFalse(w.NewestSegment().Info.UseCompoundFile, "Expected Non-CFS after merge");

            MergePolicy lmp = w.Config.MergePolicy;
            lmp.NoCFSRatio = 1.0;
            lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity;

            w.AddDocument(doc);
            w.ForceMerge(1);
            w.Commit();
            Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after merge");
            w.Dispose();
            dir.Dispose();
        }
        public virtual void TestInvalidValues()
        {
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Test IndexDeletionPolicy
            Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.DelPolicy.GetType());
            conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null));
            Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.DelPolicy.GetType());
            try
            {
                conf.SetIndexDeletionPolicy(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            // Test MergeScheduler
            Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType());
            conf.SetMergeScheduler(new SerialMergeScheduler());
            Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType());
            try
            {
                conf.SetMergeScheduler(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            // Test Similarity:
            // we shouldnt assert what the default is, just that its not null.
            Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity);
            conf.SetSimilarity(new MySimilarity());
            Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType());
            try
            {
                conf.SetSimilarity(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            // Test IndexingChain
            Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain);
            conf.SetIndexingChain(new MyIndexingChain());
            Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType());
            try
            {
                conf.SetIndexingChain(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            try
            {
                conf.SetMaxBufferedDeleteTerms(0);
                Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                conf.SetMaxBufferedDocs(1);
                Assert.Fail("should not have succeeded to set maxBufferedDocs to 1");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                // Disable both MAX_BUF_DOCS and RAM_SIZE_MB
                conf.SetMaxBufferedDocs(4);
                conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
            try
            {
                conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            // Test setReaderTermsIndexDivisor
            try
            {
                conf.SetReaderTermsIndexDivisor(0);
                Assert.Fail("should not have succeeded to set termsIndexDivisor to 0");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            // Setting to -1 is ok
            conf.SetReaderTermsIndexDivisor(-1);
            try
            {
                conf.SetReaderTermsIndexDivisor(-2);
                Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                conf.SetRAMPerThreadHardLimitMB(2048);
                Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                conf.SetRAMPerThreadHardLimitMB(0);
                Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            // Test MergePolicy
            Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType());
            conf.SetMergePolicy(new LogDocMergePolicy());
            Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType());
            try
            {
                conf.SetMergePolicy(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }
        }
        public virtual void TestDeletes1()
        {
            //IndexWriter.debug2 = System.out;
            Directory dir = new MockDirectoryWrapper(new Random(Random().Next()), new RAMDirectory());
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergeScheduler(new SerialMergeScheduler());
            iwc.SetMaxBufferedDocs(5000);
            iwc.SetRAMBufferSizeMB(100);
            RangeMergePolicy fsmp = new RangeMergePolicy(this, false);
            iwc.SetMergePolicy(fsmp);
            IndexWriter writer = new IndexWriter(dir, iwc);
            for (int x = 0; x < 5; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "1", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit1");
            writer.Commit();
            Assert.AreEqual(1, writer.SegmentCount);
            for (int x = 5; x < 10; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "2", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit2");
            writer.Commit();
            Assert.AreEqual(2, writer.SegmentCount);

            for (int x = 10; x < 15; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "3", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }

            writer.DeleteDocuments(new Term("id", "1"));

            writer.DeleteDocuments(new Term("id", "11"));

            // flushing without applying deletes means
            // there will still be deletes in the segment infos
            writer.Flush(false, false);
            Assert.IsTrue(writer.BufferedUpdatesStreamAny);

            // get reader flushes pending deletes
            // so there should not be anymore
            IndexReader r1 = writer.Reader;
            Assert.IsFalse(writer.BufferedUpdatesStreamAny);
            r1.Dispose();

            // delete id:2 from the first segment
            // merge segments 0 and 1
            // which should apply the delete id:2
            writer.DeleteDocuments(new Term("id", "2"));
            writer.Flush(false, false);
            fsmp = (RangeMergePolicy)writer.Config.MergePolicy;
            fsmp.DoMerge = true;
            fsmp.Start = 0;
            fsmp.Length = 2;
            writer.MaybeMerge();

            Assert.AreEqual(2, writer.SegmentCount);

            // id:2 shouldn't exist anymore because
            // it's been applied in the merge and now it's gone
            IndexReader r2 = writer.Reader;
            int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2);
            Assert.IsTrue(id2docs == null);
            r2.Dispose();

            /*
            /// // added docs are in the ram buffer
            /// for (int x = 15; x < 20; x++) {
            ///  writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2));
            ///  System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            /// }
            /// Assert.IsTrue(writer.numRamDocs() > 0);
            /// // delete from the ram buffer
            /// writer.DeleteDocuments(new Term("id", Integer.toString(13)));
            ///
            /// Term id3 = new Term("id", Integer.toString(3));
            ///
            /// // delete from the 1st segment
            /// writer.DeleteDocuments(id3);
            ///
            /// Assert.IsTrue(writer.numRamDocs() > 0);
            ///
            /// //System.out
            /// //    .println("segdels1:" + writer.docWriter.deletesToString());
            ///
            /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
            ///
            /// // we cause a merge to happen
            /// fsmp.doMerge = true;
            /// fsmp.start = 0;
            /// fsmp.Length = 2;
            /// System.out.println("maybeMerge "+writer.SegmentInfos);
            ///
            /// SegmentInfo info0 = writer.SegmentInfos.Info(0);
            /// SegmentInfo info1 = writer.SegmentInfos.Info(1);
            ///
            /// writer.MaybeMerge();
            /// System.out.println("maybeMerge after "+writer.SegmentInfos);
            /// // there should be docs in RAM
            /// Assert.IsTrue(writer.numRamDocs() > 0);
            ///
            /// // assert we've merged the 1 and 2 segments
            /// // and still have a segment leftover == 2
            /// Assert.AreEqual(2, writer.SegmentInfos.Size());
            /// Assert.IsFalse(segThere(info0, writer.SegmentInfos));
            /// Assert.IsFalse(segThere(info1, writer.SegmentInfos));
            ///
            /// //System.out.println("segdels2:" + writer.docWriter.deletesToString());
            ///
            /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
            ///
            /// IndexReader r = writer.GetReader();
            /// IndexReader r1 = r.getSequentialSubReaders()[0];
            /// printDelDocs(r1.GetLiveDocs());
            /// int[] docs = toDocsArray(id3, null, r);
            /// System.out.println("id3 docs:"+Arrays.toString(docs));
            /// // there shouldn't be any docs for id:3
            /// Assert.IsTrue(docs == null);
            /// r.Dispose();
            ///
            /// part2(writer, fsmp);
            ///
            */
            // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
            //System.out.println("close");
            writer.Dispose();
            dir.Dispose();
        }
Example #35
0
        /// <summary>
        /// create a new index writer config with random defaults using the specified random </summary>
        public static IndexWriterConfig NewIndexWriterConfig(Random r, LuceneVersion v, Analyzer a)
        {
            IndexWriterConfig c = new IndexWriterConfig(v, a);
            c.SetSimilarity(ClassEnvRule.Similarity);
            if (VERBOSE)
            {
                // Even though TestRuleSetupAndRestoreClassEnv calls
                // InfoStream.setDefault, we do it again here so that
                // the PrintStreamInfoStream.messageID increments so
                // that when there are separate instances of
                // IndexWriter created we see "IW 0", "IW 1", "IW 2",
                // ... instead of just always "IW 0":
                c.InfoStream = new TestRuleSetupAndRestoreClassEnv.ThreadNameFixingPrintStreamInfoStream(Console.Out);
            }

            if (r.NextBoolean())
            {
                c.SetMergeScheduler(new SerialMergeScheduler());
            }
            else if (Rarely(r))
            {
                int maxThreadCount = TestUtil.NextInt(Random(), 1, 4);
                int maxMergeCount = TestUtil.NextInt(Random(), maxThreadCount, maxThreadCount + 4);
                ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
                cms.SetMaxMergesAndThreads(maxMergeCount, maxThreadCount);
                c.SetMergeScheduler(cms);
            }
            if (r.NextBoolean())
            {
                if (Rarely(r))
                {
                    // crazy value
                    c.SetMaxBufferedDocs(TestUtil.NextInt(r, 2, 15));
                }
                else
                {
                    // reasonable value
                    c.SetMaxBufferedDocs(TestUtil.NextInt(r, 16, 1000));
                }
            }
            if (r.NextBoolean())
            {
                if (Rarely(r))
                {
                    // crazy value
                    c.SetTermIndexInterval(r.NextBoolean() ? TestUtil.NextInt(r, 1, 31) : TestUtil.NextInt(r, 129, 1000));
                }
                else
                {
                    // reasonable value
                    c.SetTermIndexInterval(TestUtil.NextInt(r, 32, 128));
                }
            }
            if (r.NextBoolean())
            {
                int maxNumThreadStates = Rarely(r) ? TestUtil.NextInt(r, 5, 20) : TestUtil.NextInt(r, 1, 4); // reasonable value -  crazy value

                if (Rarely(r))
                {
                    // Retrieve the package-private setIndexerThreadPool
                    // method:
                    MethodInfo setIndexerThreadPoolMethod = typeof(IndexWriterConfig).GetMethod("SetIndexerThreadPool", new Type[] { typeof(DocumentsWriterPerThreadPool) });
                    //setIndexerThreadPoolMethod.setAccessible(true);
                    Type clazz = typeof(RandomDocumentsWriterPerThreadPool);
                    ConstructorInfo ctor = clazz.GetConstructor(new[] { typeof(int), typeof(Random) });
                    //ctor.Accessible = true;
                    // random thread pool
                    setIndexerThreadPoolMethod.Invoke(c, new[] { ctor.Invoke(new object[] { maxNumThreadStates, r }) });
                }
                else
                {
                    // random thread pool
                    c.SetMaxThreadStates(maxNumThreadStates);
                }
            }

            c.SetMergePolicy(NewMergePolicy(r));

            if (Rarely(r))
            {
                c.SetMergedSegmentWarmer(new SimpleMergedSegmentWarmer(c.InfoStream));
            }
            c.SetUseCompoundFile(r.NextBoolean());
            c.SetReaderPooling(r.NextBoolean());
            c.SetReaderTermsIndexDivisor(TestUtil.NextInt(r, 1, 4));
            c.SetCheckIntegrityAtMerge(r.NextBoolean());
            return c;
        }
Example #36
0
        public virtual void DoTestNumbers(bool withPayloads)
        {
            Directory dir = NewDirectory();
            Analyzer analyzer = withPayloads ? (Analyzer)new MockPayloadAnalyzer() : new MockAnalyzer(Random());
            Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);

            FieldType ft = new FieldType(TextField.TYPE_STORED);
            ft.IndexOptionsValue = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random().NextBoolean())
            {
                ft.StoreTermVectors = true;
                ft.StoreTermVectorOffsets = Random().NextBoolean();
                ft.StoreTermVectorPositions = Random().NextBoolean();
            }

            int numDocs = AtLeast(500);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("numbers", English.IntToEnglish(i), ft));
                doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft));
                doc.Add(new StringField("id", "" + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader reader = w.Reader;
            w.Dispose();

            string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" };

            foreach (string term in terms)
            {
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term));
                int doc;
                while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    int freq = dp.Freq();
                    for (int i = 0; i < freq; i++)
                    {
                        dp.NextPosition();
                        int start = dp.StartOffset();
                        Debug.Assert(start >= 0);
                        int end = dp.EndOffset();
                        Debug.Assert(end >= 0 && end >= start);
                        // check that the offsets correspond to the term in the src text
                        Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term));
                        if (withPayloads)
                        {
                            // check that we have a payload and it starts with "pos"
                            Assert.IsNotNull(dp.Payload);
                            BytesRef payload = dp.Payload;
                            Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:"));
                        } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                    }
                }
            }

            // check we can skip correctly
            int numSkippingTests = AtLeast(50);

            for (int j = 0; j < numSkippingTests; j++)
            {
                int num = TestUtil.NextInt(Random(), 100, Math.Min(numDocs - 1, 999));
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"));
                int doc = dp.Advance(num);
                Assert.AreEqual(num, doc);
                int freq = dp.Freq();
                for (int i = 0; i < freq; i++)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    dp.NextPosition();
                    int start = dp.StartOffset();
                    Debug.Assert(start >= 0);
                    int end = dp.EndOffset();
                    Debug.Assert(end >= 0 && end >= start);
                    // check that the offsets correspond to the term in the src text
                    Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred"));
                    if (withPayloads)
                    {
                        // check that we have a payload and it starts with "pos"
                        Assert.IsNotNull(dp.Payload);
                        BytesRef payload = dp.Payload;
                        Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:"));
                    } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                }
            }

            // check that other fields (without offsets) work correctly

            for (int i = 0; i < numDocs; i++)
            {
                DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0);
                Assert.AreEqual(i, dp.NextDoc());
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
            }

            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestMaxMergeCount()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            int maxMergeCount = TestUtil.NextInt(Random(), 1, 5);
            int maxMergeThreads = TestUtil.NextInt(Random(), 1, maxMergeCount);
            CountdownEvent enoughMergesWaiting = new CountdownEvent(maxMergeCount);
            AtomicInteger runningMergeCount = new AtomicInteger(0);
            AtomicBoolean failed = new AtomicBoolean();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: maxMergeCount=" + maxMergeCount + " maxMergeThreads=" + maxMergeThreads);
            }

            ConcurrentMergeScheduler cms = new ConcurrentMergeSchedulerAnonymousInnerClassHelper(this, maxMergeCount, enoughMergesWaiting, runningMergeCount, failed);
            cms.SetMaxMergesAndThreads(maxMergeCount, maxMergeThreads);
            iwc.SetMergeScheduler(cms);
            iwc.SetMaxBufferedDocs(2);

            TieredMergePolicy tmp = new TieredMergePolicy();
            iwc.SetMergePolicy(tmp);
            tmp.MaxMergeAtOnce = 2;
            tmp.SegmentsPerTier = 2;

            IndexWriter w = new IndexWriter(dir, iwc);
            Document doc = new Document();
            doc.Add(NewField("field", "field", TextField.TYPE_NOT_STORED));
            while (enoughMergesWaiting.CurrentCount != 0 && !failed.Get())
            {
                for (int i = 0; i < 10; i++)
                {
                    w.AddDocument(doc);
                }
            }
            w.Dispose(false);
            dir.Dispose();
        }
 private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf)
 {
     conf.SetMergePolicy(new LogDocMergePolicy());
     IndexWriter writer = new IndexWriter(dir, conf);
     return writer;
 }
        /// <summary>
        /// Override this to customize index settings, e.g. which
        /// codec to use. 
        /// </summary>
        protected internal virtual IndexWriterConfig GetIndexWriterConfig(LuceneVersion matchVersion, 
            Analyzer indexAnalyzer, IndexWriterConfig.OpenMode_e openMode)
        {
            IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer);
            iwc.SetCodec(new Lucene46Codec());
            iwc.SetOpenMode(openMode);

            // This way all merged segments will be sorted at
            // merge time, allow for per-segment early termination
            // when those segments are searched:
            iwc.SetMergePolicy(new SortingMergePolicy(iwc.MergePolicy, SORT));

            return iwc;
        }
Example #40
0
        private static IndexWriter GetWriter(Directory directory)
        {
            MergePolicy policy = new LogByteSizeMergePolicy();
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            conf.SetMergePolicy(policy);
            conf.SetOpenMode(OpenMode_e.CREATE_OR_APPEND);

            IndexWriter writer = new IndexWriter(directory, conf);

            return writer;
        }