Expert: a MergePolicy determines the sequence of primitive merge operations to be used for overall merge and optimize operations.

Whenever the segments in an index have been altered by {@link IndexWriter}, either the addition of a newly flushed segment, addition of many segments from addIndexes* calls, or a previous merge that may now need to cascade, {@link IndexWriter} invokes {@link #findMerges} to give the MergePolicy a chance to pick merges that are now required. This method returns a {@link MergeSpecification} instance describing the set of merges that should be done, or null if no merges are necessary. When IndexWriter.optimize is called, it calls {@link #findMergesForOptimize} and the MergePolicy should then return the necessary merges.

Note that the policy can return more than one merge at a time. In this case, if the writer is using {@link SerialMergeScheduler}, the merges will be run sequentially but if it is using {@link ConcurrentMergeScheduler} they will be run concurrently.

The default MergePolicy is {@link LogByteSizeMergePolicy}.

NOTE: This API is new and still experimental (subject to change suddenly in the next release)

Example #1
0
        public virtual void Test()
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostings"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(new ConcurrentMergeScheduler())
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(OpenMode.CREATE);

            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType(TextField.TYPE_NOT_STORED);

            ft.OmitNorms    = true;
            ft.IndexOptions = IndexOptions.DOCS_ONLY;
            Field field = new Field("field", new MyTokenStream(), ft);

            doc.Add(field);

            int numDocs = (int.MaxValue / 26) + 1;

            for (int i = 0; i < numDocs; i++)
            {
                w.AddDocument(doc);
                if (Verbose && i % 100000 == 0)
                {
                    Console.WriteLine(i + " of " + numDocs + "...");
                }
            }
            w.ForceMerge(1);
            w.Dispose();
            dir.Dispose();
        }
        public virtual void TestUpgradeOldSingleSegmentIndexWithAdditions()
        {
            foreach (string name in OldSingleSegmentNames)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("testUpgradeOldSingleSegmentIndexWithAdditions: index=" + name);
                }
                Directory dir = NewDirectory(OldIndexDirs[name]);

                Assert.AreEqual(1, GetNumberOfSegments(dir), "Original index must be single segment");

                // create a bunch of dummy segments
                int          id     = 40;
                RAMDirectory ramDir = new RAMDirectory();
                for (int i = 0; i < 3; i++)
                {
                    // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge:
                    MergePolicy       mp  = Random().NextBoolean() ? (MergePolicy)NewLogMergePolicy() : NewTieredMergePolicy();
                    IndexWriterConfig iwc = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(mp);
                    IndexWriter       w   = new IndexWriter(ramDir, iwc);
                    // add few more docs:
                    for (int j = 0; j < RANDOM_MULTIPLIER * Random().Next(30); j++)
                    {
                        AddDoc(w, id++);
                    }
                    w.Dispose(false);
                }

                // add dummy segments (which are all in current
                // version) to single segment index
                MergePolicy       mp_  = Random().NextBoolean() ? (MergePolicy)NewLogMergePolicy() : NewTieredMergePolicy();
                IndexWriterConfig iwc_ = (new IndexWriterConfig(TEST_VERSION_CURRENT, null)).SetMergePolicy(mp_);
                IndexWriter       iw   = new IndexWriter(dir, iwc_);
                iw.AddIndexes(ramDir);
                iw.Dispose(false);

                // determine count of segments in modified index
                int origSegCount = GetNumberOfSegments(dir);

                NewIndexUpgrader(dir).Upgrade();

                int segCount = CheckAllSegmentsUpgraded(dir);
                Assert.AreEqual(origSegCount, segCount, "Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged");

                dir.Dispose();
            }
        }
		internal int docShift; // total # deleted docs that were compacted by this merge
		
		public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount)
		{
			this.docMaps = docMaps;
			SegmentInfo firstSegment = merge.segments.Info(0);
			int i = 0;
			while (true)
			{
				SegmentInfo info = infos.Info(i);
				if (info.Equals(firstSegment))
					break;
				minDocID += info.docCount;
				i++;
			}
			
			int numDocs = 0;
			for (int j = 0; j < docMaps.Length; i++, j++)
			{
				numDocs += infos.Info(i).docCount;
				System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j)));
			}
			maxDocID = minDocID + numDocs;
			
			starts = new int[docMaps.Length];
			newStarts = new int[docMaps.Length];
			
			starts[0] = minDocID;
			newStarts[0] = minDocID;
			for (i = 1; i < docMaps.Length; i++)
			{
				int lastDocCount = merge.segments.Info(i - 1).docCount;
				starts[i] = starts[i - 1] + lastDocCount;
				newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1];
			}
			docShift = numDocs - mergedDocCount;
			
			// There are rare cases when docShift is 0.  It happens
			// if you try to delete a docID that's out of bounds,
			// because the SegmentReader still allocates deletedDocs
			// and pretends it has deletions ... so we can't make
			// this assert here
			// assert docShift > 0;
			
			// Make sure it all adds up:
			System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1]));
		}
        public virtual void TestLiveChangeToCFS()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy(true));
            // Start false:
            iwc.SetUseCompoundFile(false);
            iwc.MergePolicy.NoCFSRatio = 0.0d;
            IndexWriter w = new IndexWriter(dir, iwc);

            // Change to true:
            w.Config.SetUseCompoundFile(true);

            Document doc = new Document();

            doc.Add(NewStringField("field", "foo", Store.NO));
            w.AddDocument(doc);
            w.Commit();
            Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after commit");

            doc.Add(NewStringField("field", "foo", Store.NO));
            w.AddDocument(doc);
            w.Commit();
            w.ForceMerge(1);
            w.Commit();

            // no compound files after merge
            Assert.IsFalse(w.NewestSegment().Info.UseCompoundFile, "Expected Non-CFS after merge");

            MergePolicy lmp = w.Config.MergePolicy;

            lmp.NoCFSRatio          = 1.0;
            lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity;

            w.AddDocument(doc);
            w.ForceMerge(1);
            w.Commit();
            Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after merge");
            w.Dispose();
            dir.Dispose();
        }
        public virtual void TestMergeStability()
        {
            Directory dir = NewDirectory();
            // do not use newMergePolicy that might return a MockMergePolicy that ignores the no-CFS ratio
            MergePolicy mp = NewTieredMergePolicy();

            mp.NoCFSRatio = 0;
            var cfg = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(false).SetMergePolicy(mp);

            using (var w = new RandomIndexWriter(Random(), dir, cfg))
            {
                var numDocs = AtLeast(500);
                for (var i = 0; i < numDocs; ++i)
                {
                    var d = new Document();
                    AddRandomFields(d);
                    w.AddDocument(d);
                }
                w.ForceMerge(1);
                w.Commit();
            }
            IndexReader reader = DirectoryReader.Open(dir);

            Directory dir2 = NewDirectory();

            mp            = NewTieredMergePolicy();
            mp.NoCFSRatio = 0;
            cfg           = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(false).SetMergePolicy(mp);

            using (var w = new RandomIndexWriter(Random(), dir2, cfg))
            {
                w.AddIndexes(reader);
                w.Commit();
            }

            assertEquals(BytesUsedByExtension(dir), BytesUsedByExtension(dir2));

            reader.Dispose();
            dir.Dispose();
            dir2.Dispose();
        }
Example #6
0
        public virtual void TestKeepNoneOnInitDeletionPolicy()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                Directory dir = NewDirectory();

                IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(new KeepNoneOnInitDeletionPolicy(this)).SetMaxBufferedDocs(10);
                MergePolicy       mp   = conf.MergePolicy;
                mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0;
                IndexWriter writer = new IndexWriter(dir, conf);
                KeepNoneOnInitDeletionPolicy policy = (KeepNoneOnInitDeletionPolicy)writer.Config.IndexDeletionPolicy;
                for (int i = 0; i < 107; i++)
                {
                    AddDoc(writer);
                }
                writer.Dispose();

                conf          = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy);
                mp            = conf.MergePolicy;
                mp.NoCFSRatio = 1.0;
                writer        = new IndexWriter(dir, conf);
                policy        = (KeepNoneOnInitDeletionPolicy)writer.Config.IndexDeletionPolicy;
                writer.ForceMerge(1);
                writer.Dispose();

                Assert.AreEqual(2, policy.NumOnInit);
                // If we are not auto committing then there should
                // be exactly 2 commits (one per close above):
                Assert.AreEqual(2, policy.NumOnCommit);

                // Simplistic check: just verify the index is in fact
                // readable:
                IndexReader reader = DirectoryReader.Open(dir);
                reader.Dispose();

                dir.Dispose();
            }
        }
        public virtual void TestClone()
        {
            IndexWriterConfig conf  = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriterConfig clone = (IndexWriterConfig)conf.Clone();

            // Make sure parameters that can't be reused are cloned
            IndexDeletionPolicy delPolicy      = conf.IndexDeletionPolicy;
            IndexDeletionPolicy delPolicyClone = clone.IndexDeletionPolicy;

            Assert.IsTrue(delPolicy.GetType() == delPolicyClone.GetType() && (delPolicy != delPolicyClone || delPolicy.Clone() == delPolicyClone.Clone()));

            FlushPolicy flushPolicy      = conf.FlushPolicy;
            FlushPolicy flushPolicyClone = clone.FlushPolicy;

            Assert.IsTrue(flushPolicy.GetType() == flushPolicyClone.GetType() && (flushPolicy != flushPolicyClone || flushPolicy.Clone() == flushPolicyClone.Clone()));

            DocumentsWriterPerThreadPool pool      = conf.IndexerThreadPool;
            DocumentsWriterPerThreadPool poolClone = clone.IndexerThreadPool;

            Assert.IsTrue(pool.GetType() == poolClone.GetType() && (pool != poolClone || pool.Clone() == poolClone.Clone()));

            MergePolicy mergePolicy      = conf.MergePolicy;
            MergePolicy mergePolicyClone = clone.MergePolicy;

            Assert.IsTrue(mergePolicy.GetType() == mergePolicyClone.GetType() && (mergePolicy != mergePolicyClone || mergePolicy.Clone() == mergePolicyClone.Clone()));

            IMergeScheduler mergeSched      = conf.MergeScheduler;
            IMergeScheduler mergeSchedClone = clone.MergeScheduler;

            Assert.IsTrue(mergeSched.GetType() == mergeSchedClone.GetType() && (mergeSched != mergeSchedClone || mergeSched.Clone() == mergeSchedClone.Clone()));

            conf.SetMergeScheduler(new SerialMergeScheduler());
#if !FEATURE_CONCURRENTMERGESCHEDULER
            Assert.AreEqual(typeof(TaskMergeScheduler), clone.MergeScheduler.GetType());
#else
            Assert.AreEqual(typeof(ConcurrentMergeScheduler), clone.MergeScheduler.GetType());
#endif
        }
Example #8
0
        private void AssertSetters(MergePolicy lmp)
        {
            lmp.MaxCFSSegmentSizeMB = 2.0;
            Assert.AreEqual(2.0, lmp.MaxCFSSegmentSizeMB, EPSILON);

            lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity;
            Assert.AreEqual(long.MaxValue / 1024 / 1024.0, lmp.MaxCFSSegmentSizeMB, EPSILON * long.MaxValue);

            lmp.MaxCFSSegmentSizeMB = long.MaxValue / 1024 / 1024.0;
            Assert.AreEqual(long.MaxValue / 1024 / 1024.0, lmp.MaxCFSSegmentSizeMB, EPSILON * long.MaxValue);

            try
            {
                lmp.MaxCFSSegmentSizeMB = -2.0;
                Assert.Fail("Didn't throw IllegalArgumentException");
            }
            catch (System.ArgumentException iae)
            {
                // pass
            }

            // TODO: Add more checks for other non-double setters!
        }
Example #9
0
        private void AssertSetters(MergePolicy lmp)
        {
            lmp.MaxCFSSegmentSizeMB = 2.0;
            Assert.AreEqual(2.0, lmp.MaxCFSSegmentSizeMB, EPSILON);

            lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity;
            Assert.AreEqual(long.MaxValue / 1024 / 1024.0, lmp.MaxCFSSegmentSizeMB, EPSILON * long.MaxValue);

            lmp.MaxCFSSegmentSizeMB = long.MaxValue / 1024 / 1024.0;
            Assert.AreEqual(long.MaxValue / 1024 / 1024.0, lmp.MaxCFSSegmentSizeMB, EPSILON * long.MaxValue);

            try
            {
                lmp.MaxCFSSegmentSizeMB = -2.0;
                Assert.Fail("Didn't throw IllegalArgumentException");
            }
            catch (ArgumentOutOfRangeException) // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention)
            {
                // pass
            }

            // TODO: Add more checks for other non-double setters!
        }
Example #10
0
        public virtual void TestMergeStability()
        {
            using Directory dir = NewDirectory();
            // do not use newMergePolicy that might return a MockMergePolicy that ignores the no-CFS ratio
            MergePolicy mp = NewTieredMergePolicy();

            mp.NoCFSRatio = 0;
            var cfg = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetUseCompoundFile(false).SetMergePolicy(mp);

            using (var w = new RandomIndexWriter(Random, dir, cfg))
            {
                var numDocs = AtLeast(500);
                for (var i = 0; i < numDocs; ++i)
                {
                    var d = new Document();
                    AddRandomFields(d);
                    w.AddDocument(d);
                }
                w.ForceMerge(1);
                w.Commit();
            }
            using IndexReader reader = DirectoryReader.Open(dir);
            using Directory dir2     = NewDirectory();
            mp            = NewTieredMergePolicy();
            mp.NoCFSRatio = 0;
            cfg           = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetUseCompoundFile(false).SetMergePolicy(mp);

            using (var w = new RandomIndexWriter(Random, dir2, cfg))
            {
                w.AddIndexes(reader);
                w.Commit();
            }

            // LUCENENET: We need to explicitly call Equals() and use HashMap in order to ensure our
            // equality check is done correctly. Calling Assert.AreEqual doesn't guarantee this is done.
            Assert.True(BytesUsedByExtension(dir).Equals(BytesUsedByExtension(dir2)));
        }
Example #11
0
        public virtual void Test2BTerms_Mem()
        {
            if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal))
            {
                throw RuntimeException.Create("this test cannot run with PreFlex codec");
            }
            Console.WriteLine("Starting Test2B");
            long TERM_COUNT = ((long)int.MaxValue) + 100000000;

            int TERMS_PER_DOC = TestUtil.NextInt32(Random, 100000, 1000000);

            IList <BytesRef> savedTerms = null;

            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms"));

            //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
            }
            dir.CheckIndexOnDispose = false; // don't double-checkindex

            if (true)
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                                .SetRAMBufferSizeMB(256.0)
                                                .SetMergeScheduler(new ConcurrentMergeScheduler())
                                                .SetMergePolicy(NewLogMergePolicy(false, 10))
                                                .SetOpenMode(OpenMode.CREATE));

                MergePolicy mp = w.Config.MergePolicy;
                if (mp is LogByteSizeMergePolicy)
                {
                    // 1 petabyte:
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
                }

                Documents.Document doc = new Documents.Document();
                MyTokenStream      ts  = new MyTokenStream(Random, TERMS_PER_DOC);

                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.IndexOptions = IndexOptions.DOCS_ONLY;
                customType.OmitNorms    = true;
                Field field = new Field("field", ts, customType);
                doc.Add(field);
                //w.setInfoStream(System.out);
                int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC);

                Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC);
                Console.WriteLine("numDocs=" + numDocs);

                for (int i = 0; i < numDocs; i++)
                {
                    long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond;                                                          // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                    w.AddDocument(doc);
                    Console.WriteLine(i + " of " + numDocs + " " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                }
                savedTerms = ts.savedTerms;

                Console.WriteLine("TEST: full merge");
                w.ForceMerge(1);
                Console.WriteLine("TEST: close writer");
                w.Dispose();
            }

            Console.WriteLine("TEST: open reader");
            IndexReader r = DirectoryReader.Open(dir);

            if (savedTerms is null)
            {
                savedTerms = FindTerms(r);
            }
            int numSavedTerms            = savedTerms.Count;
            IList <BytesRef> bigOrdTerms = new JCG.List <BytesRef>(savedTerms.GetView(numSavedTerms - 10, 10)); // LUCENENET: Converted end index to length

            Console.WriteLine("TEST: test big ord terms...");
            TestSavedTerms(r, bigOrdTerms);
            Console.WriteLine("TEST: test all saved terms...");
            TestSavedTerms(r, savedTerms);
            r.Dispose();

            Console.WriteLine("TEST: now CheckIndex...");
            CheckIndex.Status status = TestUtil.CheckIndex(dir);
            long tc = status.SegmentInfos[0].TermIndexStatus.TermCount;

            Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue);

            dir.Dispose();
            Console.WriteLine("TEST: done!");
        }
Example #12
0
        private void HandleMergeException(Exception t, MergePolicy.OneMerge merge)
        {
            if (infoStream.IsEnabled("IW"))
            {
                infoStream.Message("IW", "handleMergeException: merge=" + SegString(merge.Segments) + " exc=" + t);
            }

            // Set the exception on the merge, so if
            // forceMerge is waiting on us it sees the root
            // cause exception:
            merge.Exception = t;
            AddMergeException(merge);

            if ((t as MergePolicy.MergeAbortedException) != null)
            {
                // We can ignore this exception (it happens when
                // close(false) or rollback is called), unless the
                // merge involves segments from external directories,
                // in which case we must throw it so, for example, the
                // rollbackTransaction code in addIndexes* is
                // executed.
                if (merge.IsExternal)
                {
                    throw t;
                }
            }
            else
            {
                IOUtils.ReThrow(t);
            }
        }
Example #13
0
		internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge)
		{
			InitBlock();
			directory = writer.GetDirectory();
			segment = name;
			if (merge != null)
			{
				checkAbort = new CheckAbort(merge, directory);
			}
			else
			{
				checkAbort = new AnonymousClassCheckAbort1(this, null, null);
			}
			termIndexInterval = writer.GetTermIndexInterval();
		}
Example #14
0
		internal virtual void  AddMergeException(MergePolicy.OneMerge merge)
		{
			lock (this)
			{
				System.Diagnostics.Debug.Assert(merge.GetException() != null);
				if (!mergeExceptions.Contains(merge) && mergeGen == merge.mergeGen)
					mergeExceptions.Add(merge);
			}
		}
Example #15
0
 private void EnsureValidMerge(MergePolicy.OneMerge merge)
 {
     lock (this)
     {
         foreach (SegmentCommitInfo info in merge.Segments)
         {
             if (!segmentInfos.Contains(info))
             {
                 throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.Info.Name + ") that is not in the current index " + SegString(), directory);
             }
         }
     }
 }
Example #16
0
        public virtual void TestKeepLastNDeletionPolicyWithCreates()
        {
            const int N = 10;

            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                Directory         dir  = NewDirectory();
                IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(new KeepLastNDeletionPolicy(this, N)).SetMaxBufferedDocs(10);
                MergePolicy       mp   = conf.MergePolicy;
                mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0;
                IndexWriter             writer = new IndexWriter(dir, conf);
                KeepLastNDeletionPolicy policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy;
                writer.Dispose();
                Term  searchTerm = new Term("content", "aaa");
                Query query      = new TermQuery(searchTerm);

                for (int i = 0; i < N + 1; i++)
                {
                    conf          = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy).SetMaxBufferedDocs(10);
                    mp            = conf.MergePolicy;
                    mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0;
                    writer        = new IndexWriter(dir, conf);
                    policy        = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy;
                    for (int j = 0; j < 17; j++)
                    {
                        AddDocWithID(writer, i * (N + 1) + j);
                    }
                    // this is a commit
                    writer.Dispose();
                    conf   = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetIndexDeletionPolicy(policy).SetMergePolicy(NoMergePolicy.COMPOUND_FILES);
                    writer = new IndexWriter(dir, conf);
                    policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy;
                    writer.DeleteDocuments(new Term("id", "" + (i * (N + 1) + 3)));
                    // this is a commit
                    writer.Dispose();
                    IndexReader   reader   = DirectoryReader.Open(dir);
                    IndexSearcher searcher = NewSearcher(reader);
                    ScoreDoc[]    hits     = searcher.Search(query, null, 1000).ScoreDocs;
                    Assert.AreEqual(16, hits.Length);
                    reader.Dispose();

                    writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(policy));
                    policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy;
                    // this will not commit: there are no changes
                    // pending because we opened for "create":
                    writer.Dispose();
                }

                Assert.AreEqual(3 * (N + 1) + 1, policy.NumOnInit);
                Assert.AreEqual(3 * (N + 1) + 1, policy.NumOnCommit);

                IndexReader   rwReader  = DirectoryReader.Open(dir);
                IndexSearcher searcher_ = NewSearcher(rwReader);
                ScoreDoc[]    hits_     = searcher_.Search(query, null, 1000).ScoreDocs;
                Assert.AreEqual(0, hits_.Length);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                long gen = SegmentInfos.GetLastCommitGeneration(dir);

                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                int expectedCount = 0;

                rwReader.Dispose();

                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = DirectoryReader.Open(dir);

                        // Work backwards in commits on what the expected
                        // count should be.
                        searcher_ = NewSearcher(reader);
                        hits_     = searcher_.Search(query, null, 1000).ScoreDocs;
                        Assert.AreEqual(expectedCount, hits_.Length);
                        if (expectedCount == 0)
                        {
                            expectedCount = 16;
                        }
                        else if (expectedCount == 16)
                        {
                            expectedCount = 17;
                        }
                        else if (expectedCount == 17)
                        {
                            expectedCount = 0;
                        }
                        reader.Dispose();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits before last " + N);
                        }
                    }
                    catch (IOException /*e*/)
                    {
                        if (i != N)
                        {
                            throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Dispose();
            }
        }
		/// <summary>Called whenever a merge has completed and the merged segments had deletions </summary>
		internal void  RemapDeletes(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount)
		{
			lock (this)
			{
				if (docMaps == null)
				// The merged segments had no deletes so docIDs did not change and we have nothing to do
					return ;
				MergeDocIDRemapper mapper = new MergeDocIDRemapper(infos, docMaps, delCounts, merge, mergeDocCount);
				deletesInRAM.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount);
				deletesFlushed.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount);
				flushedDocCount -= mapper.docShift;
			}
		}
Example #18
0
        public virtual void TestExpirationTimeDeletionPolicy()
        {
            const double SECONDS = 2.0;

            Directory         dir  = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(new ExpirationTimeDeletionPolicy(this, dir, SECONDS));
            MergePolicy       mp   = conf.MergePolicy;

            mp.NoCFSRatio = 1.0;
            IndexWriter writer = new IndexWriter(dir, conf);
            ExpirationTimeDeletionPolicy policy     = (ExpirationTimeDeletionPolicy)writer.Config.IndexDeletionPolicy;
            IDictionary <string, string> commitData = new Dictionary <string, string>();

            commitData["commitTime"] = Convert.ToString(Environment.TickCount);
            writer.SetCommitData(commitData);
            writer.Commit();
            writer.Dispose();

            long lastDeleteTime  = 0;
            int  targetNumDelete = TestUtil.NextInt32(Random, 1, 5);

            while (policy.NumDelete < targetNumDelete)
            {
                // Record last time when writer performed deletes of
                // past commits
                lastDeleteTime = Environment.TickCount;
                conf           = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy);
                mp             = conf.MergePolicy;
                mp.NoCFSRatio  = 1.0;
                writer         = new IndexWriter(dir, conf);
                policy         = (ExpirationTimeDeletionPolicy)writer.Config.IndexDeletionPolicy;
                for (int j = 0; j < 17; j++)
                {
                    AddDoc(writer);
                }
                commitData = new Dictionary <string, string>();
                commitData["commitTime"] = Convert.ToString(Environment.TickCount);
                writer.SetCommitData(commitData);
                writer.Commit();
                writer.Dispose();

                Thread.Sleep((int)(1000.0 * (SECONDS / 5.0)));
            }

            // Then simplistic check: just verify that the
            // segments_N's that still exist are in fact within SECONDS
            // seconds of the last one's mod time, and, that I can
            // open a reader on each:
            long gen = SegmentInfos.GetLastCommitGeneration(dir);

            string fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);

            dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);

            bool oneSecondResolution = true;

            while (gen > 0)
            {
                try
                {
                    IndexReader reader = DirectoryReader.Open(dir);
                    reader.Dispose();
                    fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);

                    // if we are on a filesystem that seems to have only
                    // 1 second resolution, allow +1 second in commit
                    // age tolerance:
                    SegmentInfos sis = new SegmentInfos();
                    sis.Read(dir, fileName);
                    long modTime = Convert.ToInt64(sis.UserData["commitTime"]);
                    oneSecondResolution &= (modTime % 1000) == 0;
                    long leeway = (long)((SECONDS + (oneSecondResolution ? 1.0 : 0.0)) * 1000);

                    Assert.IsTrue(lastDeleteTime - modTime <= leeway, "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted ");
                }
#pragma warning disable 168
                catch (IOException e)
#pragma warning restore 168
                {
                    // OK
                    break;
                }

                dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                gen--;
            }

            dir.Dispose();
        }
		/// <summary>Does the actual merge, by calling {@link IndexWriter#merge} </summary>
		protected internal virtual void  DoMerge(MergePolicy.OneMerge merge)
		{
			writer.Merge(merge);
		}
			public MergeThread(ConcurrentMergeScheduler enclosingInstance, IndexWriter writer, MergePolicy.OneMerge startMerge)
			{
				InitBlock(enclosingInstance);
				this.writer = writer;
				this.startMerge = startMerge;
			}
Example #21
0
        public virtual void RunTest(string testName)
        {
            m_failed.Value    = (false);
            m_addCount.Value  = 0;
            m_delCount.Value  = 0;
            m_packCount.Value = 0;

            long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            Random random = new J2N.Randomizer(Random.NextInt64());

            using LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues);
            DirectoryInfo tempDir = CreateTempDir(testName);

            m_dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
            if (m_dir is BaseDirectoryWrapper baseDirectoryWrapper)
            {
                baseDirectoryWrapper.CheckIndexOnDispose = false; // don't double-checkIndex, we do it ourselves.
            }
            MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream());

            if (LuceneTestCase.TestNightly)
            {
                // newIWConfig makes smallish max seg size, which
                // results in tons and tons of segments for this test
                // when run nightly:
                MergePolicy mp = conf.MergePolicy;
                if (mp is TieredMergePolicy tieredMergePolicy)
                {
                    //tieredMergePolicy.MaxMergedSegmentMB = 5000.0;
                    tieredMergePolicy.MaxMergedSegmentMB = 2500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour
                }
                else if (mp is LogByteSizeMergePolicy logByteSizeMergePolicy)
                {
                    //logByteSizeMergePolicy.MaxMergeMB = 1000.0;
                    logByteSizeMergePolicy.MaxMergeMB = 500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour
                }
                else if (mp is LogMergePolicy logMergePolicy)
                {
                    //logMergePolicy.MaxMergeDocs = 100000;
                    logMergePolicy.MaxMergeDocs = 50000; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour
                }
            }

            conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousClass(this));

            if (Verbose)
            {
                conf.SetInfoStream(new PrintStreamInfoStreamAnonymousClass(Console.Out));
            }
            m_writer = new IndexWriter(m_dir, conf);
            TestUtil.ReduceOpenFiles(m_writer);

            TaskScheduler es = LuceneTestCase.Random.NextBoolean() ? null : TaskScheduler.Default;

            DoAfterWriter(es);

            int NUM_INDEX_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 2, 4);

            //int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 300 : RandomMultiplier;
            // LUCENENET specific - lowered from 300 to 150 to reduce total time on Nightly
            // build to less than 1 hour.
            int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 150 : RandomMultiplier;

            ISet <string>             delIDs     = new ConcurrentHashSet <string>();
            ISet <string>             delPackIDs = new ConcurrentHashSet <string>();
            ConcurrentQueue <SubDocs> allSubDocs = new ConcurrentQueue <SubDocs>();

            long stopTime = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + (RUN_TIME_SEC * 1000); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            ThreadJob[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

            if (Verbose)
            {
                Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }

            // Let index build up a bit
            Thread.Sleep(100);

            DoSearching(es, stopTime);

            if (Verbose)
            {
                Console.WriteLine("TEST: all searching done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }

            for (int thread = 0; thread < indexThreads.Length; thread++)
            {
                indexThreads[thread].Join();
            }

            if (Verbose)
            {
                Console.WriteLine("TEST: done join indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]; addCount=" + m_addCount + " delCount=" + m_delCount); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }

            IndexSearcher s = GetFinalSearcher();

            if (Verbose)
            {
                Console.WriteLine("TEST: finalSearcher=" + s);
            }

            assertFalse(m_failed);

            bool doFail = false;

            // Verify: make sure delIDs are in fact deleted:
            foreach (string id in delIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc);
                    doFail = true;
                }
            }

            // Verify: make sure delPackIDs are in fact deleted:
            foreach (string id in delPackIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches");
                    doFail = true;
                }
            }

            // Verify: make sure each group of sub-docs are still in docID order:
            foreach (SubDocs subDocs in allSubDocs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20);
                if (!subDocs.Deleted)
                {
                    // We sort by relevance but the scores should be identical so sort falls back to by docID:
                    if (hits.TotalHits != subDocs.SubIDs.Count)
                    {
                        Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits);
                        doFail = true;
                    }
                    else
                    {
                        int lastDocID  = -1;
                        int startDocID = -1;
                        foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
                        {
                            int docID = scoreDoc.Doc;
                            if (lastDocID != -1)
                            {
                                assertEquals(1 + lastDocID, docID);
                            }
                            else
                            {
                                startDocID = docID;
                            }
                            lastDocID = docID;
                            Document doc = s.Doc(docID);
                            assertEquals(subDocs.PackID, doc.Get("packID"));
                        }

                        lastDocID = startDocID - 1;
                        foreach (string subID in subDocs.SubIDs)
                        {
                            hits = s.Search(new TermQuery(new Term("docid", subID)), 1);
                            assertEquals(1, hits.TotalHits);
                            int docID = hits.ScoreDocs[0].Doc;
                            if (lastDocID != -1)
                            {
                                assertEquals(1 + lastDocID, docID);
                            }
                            lastDocID = docID;
                        }
                    }
                }
                else
                {
                    // Pack was deleted -- make sure its docs are
                    // deleted.  We can't verify packID is deleted
                    // because we can re-use packID for update:
                    foreach (string subID in subDocs.SubIDs)
                    {
                        assertEquals(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits);
                    }
                }
            }

            // Verify: make sure all not-deleted docs are in fact
            // not deleted:
            int endID = Convert.ToInt32(docs.NextDoc().Get("docid"), CultureInfo.InvariantCulture);

            docs.Dispose();

            for (int id = 0; id < endID; id++)
            {
                string stringID = id.ToString(CultureInfo.InvariantCulture);
                if (!delIDs.Contains(stringID))
                {
                    TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1);
                    if (hits.TotalHits != 1)
                    {
                        Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + Collections.ToString(delIDs));
                        doFail = true;
                    }
                }
            }
            assertFalse(doFail);

            assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, s.IndexReader.NumDocs);
            ReleaseSearcher(s);

            m_writer.Commit();

            assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, m_writer.NumDocs);

            DoClose();
            m_writer.Dispose(false);

            // Cannot shutdown until after writer is closed because
            // writer has merged segment warmer that uses IS to run
            // searches, and that IS may be using this es!

            /*if (es != null)
             * {
             * es.shutdown();
             * es.awaitTermination(1, TimeUnit.SECONDS);
             * }*/

            TestUtil.CheckIndex(m_dir);
            m_dir.Dispose();
            //System.IO.Directory.Delete(tempDir.FullName, true);
            TestUtil.Rm(tempDir);

            if (Verbose)
            {
                Console.WriteLine("TEST: done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }
        }
Example #22
0
        private bool CommitMerge(MergePolicy.OneMerge merge, MergeState mergeState)
        {
            lock (this)
            {
                Debug.Assert(TestPoint("startCommitMerge"));

                if (HitOOM)
                {
                    throw new InvalidOperationException("this writer hit an OutOfMemoryError; cannot complete merge");
                }

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "commitMerge: " + SegString(merge.Segments) + " index=" + SegString());
                }

                Debug.Assert(merge.RegisterDone);

                // If merge was explicitly aborted, or, if rollback() or
                // rollbackTransaction() had been called since our merge
                // started (which results in an unqualified
                // deleter.refresh() call that will remove any index
                // file that current segments does not reference), we
                // abort this merge
                if (merge.Aborted)
                {
                    if (infoStream.IsEnabled("IW"))
                    {
                        infoStream.Message("IW", "commitMerge: skip: it was aborted");
                    }
                    // In case we opened and pooled a reader for this
                    // segment, drop it now.  this ensures that we close
                    // the reader before trying to delete any of its
                    // files.  this is not a very big deal, since this
                    // reader will never be used by any NRT reader, and
                    // another thread is currently running close(false)
                    // so it will be dropped shortly anyway, but not
                    // doing this  makes  MockDirWrapper angry in
                    // TestNRTThreads (LUCENE-5434):
                    readerPool.Drop(merge.Info_Renamed);
                    Deleter.DeleteNewFiles(merge.Info_Renamed.Files());
                    return false;
                }

                ReadersAndUpdates mergedUpdates = merge.Info_Renamed.Info.DocCount == 0 ? null : CommitMergedDeletesAndUpdates(merge, mergeState);
                //    System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMerge: mergedDeletes=" + mergedDeletes);

                // If the doc store we are using has been closed and
                // is in now compound format (but wasn't when we
                // started), then we will switch to the compound
                // format as well:

                Debug.Assert(!segmentInfos.Contains(merge.Info_Renamed));

                bool allDeleted = merge.Segments.Count == 0 || merge.Info_Renamed.Info.DocCount == 0 || (mergedUpdates != null && mergedUpdates.PendingDeleteCount == merge.Info_Renamed.Info.DocCount);

                if (infoStream.IsEnabled("IW"))
                {
                    if (allDeleted)
                    {
                        infoStream.Message("IW", "merged segment " + merge.Info_Renamed + " is 100% deleted" + (KeepFullyDeletedSegments_Renamed ? "" : "; skipping insert"));
                    }
                }

                bool dropSegment = allDeleted && !KeepFullyDeletedSegments_Renamed;

                // If we merged no segments then we better be dropping
                // the new segment:
                Debug.Assert(merge.Segments.Count > 0 || dropSegment);

                Debug.Assert(merge.Info_Renamed.Info.DocCount != 0 || KeepFullyDeletedSegments_Renamed || dropSegment);

                if (mergedUpdates != null)
                {
                    bool success = false;
                    try
                    {
                        if (dropSegment)
                        {
                            mergedUpdates.DropChanges();
                        }
                        // Pass false for assertInfoLive because the merged
                        // segment is not yet live (only below do we commit it
                        // to the segmentInfos):
                        readerPool.Release(mergedUpdates, false);
                        success = true;
                    }
                    finally
                    {
                        if (!success)
                        {
                            mergedUpdates.DropChanges();
                            readerPool.Drop(merge.Info_Renamed);
                        }
                    }
                }

                // Must do this after readerPool.release, in case an
                // exception is hit e.g. writing the live docs for the
                // merge segment, in which case we need to abort the
                // merge:
                segmentInfos.ApplyMergeChanges(merge, dropSegment);

                if (dropSegment)
                {
                    Debug.Assert(!segmentInfos.Contains(merge.Info_Renamed));
                    readerPool.Drop(merge.Info_Renamed);
                    Deleter.DeleteNewFiles(merge.Info_Renamed.Files());
                }

                bool success_ = false;
                try
                {
                    // Must close before checkpoint, otherwise IFD won't be
                    // able to delete the held-open files from the merge
                    // readers:
                    CloseMergeReaders(merge, false);
                    success_ = true;
                }
                finally
                {
                    // Must note the change to segmentInfos so any commits
                    // in-flight don't lose it (IFD will incRef/protect the
                    // new files we created):
                    if (success_)
                    {
                        Checkpoint();
                    }
                    else
                    {
                        try
                        {
                            Checkpoint();
                        }
                        catch (Exception)
                        {
                            // Ignore so we keep throwing original exception.
                        }
                    }
                }

                Deleter.DeletePendingFiles();

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "after commitMerge: " + SegString());
                }

                if (merge.MaxNumSegments != -1 && !dropSegment)
                {
                    // cascade the forceMerge:
                    if (!SegmentsToMerge.ContainsKey(merge.Info_Renamed))
                    {
                        SegmentsToMerge[merge.Info_Renamed] = false;
                    }
                }

                return true;
            }
        }
Example #23
0
        /// <summary>
        /// Carefully merges deletes and updates for the segments we just merged. this
        /// is tricky because, although merging will clear all deletes (compacts the
        /// documents) and compact all the updates, new deletes and updates may have
        /// been flushed to the segments since the merge was started. this method
        /// "carries over" such new deletes and updates onto the newly merged segment,
        /// and saves the resulting deletes and updates files (incrementing the delete
        /// and DV generations for merge.info). If no deletes were flushed, no new
        /// deletes file is saved.
        /// </summary>
        private ReadersAndUpdates CommitMergedDeletesAndUpdates(MergePolicy.OneMerge merge, MergeState mergeState)
        {
            lock (this)
            {
                Debug.Assert(TestPoint("startCommitMergeDeletes"));

                IList<SegmentCommitInfo> sourceSegments = merge.Segments;

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "commitMergeDeletes " + SegString(merge.Segments));
                }

                // Carefully merge deletes that occurred after we
                // started merging:
                int docUpto = 0;
                long minGen = long.MaxValue;

                // Lazy init (only when we find a delete to carry over):
                MergedDeletesAndUpdates holder = new MergedDeletesAndUpdates();
                DocValuesFieldUpdates.Container mergedDVUpdates = new DocValuesFieldUpdates.Container();

                for (int i = 0; i < sourceSegments.Count; i++)
                {
                    SegmentCommitInfo info = sourceSegments[i];
                    minGen = Math.Min(info.BufferedDeletesGen, minGen);
                    int docCount = info.Info.DocCount;
                    Bits prevLiveDocs = merge.Readers[i].LiveDocs;
                    ReadersAndUpdates rld = readerPool.Get(info, false);
                    // We hold a ref so it should still be in the pool:
                    Debug.Assert(rld != null, "seg=" + info.Info.Name);
                    Bits currentLiveDocs = rld.LiveDocs;
                    IDictionary<string, DocValuesFieldUpdates> mergingFieldUpdates = rld.MergingFieldUpdates;
                    string[] mergingFields;
                    DocValuesFieldUpdates[] dvFieldUpdates;
                    DocValuesFieldUpdates.Iterator[] updatesIters;
                    if (mergingFieldUpdates.Count == 0)
                    {
                        mergingFields = null;
                        updatesIters = null;
                        dvFieldUpdates = null;
                    }
                    else
                    {
                        mergingFields = new string[mergingFieldUpdates.Count];
                        dvFieldUpdates = new DocValuesFieldUpdates[mergingFieldUpdates.Count];
                        updatesIters = new DocValuesFieldUpdates.Iterator[mergingFieldUpdates.Count];
                        int idx = 0;
                        foreach (KeyValuePair<string, DocValuesFieldUpdates> e in mergingFieldUpdates)
                        {
                            string field = e.Key;
                            DocValuesFieldUpdates updates = e.Value;
                            mergingFields[idx] = field;
                            dvFieldUpdates[idx] = mergedDVUpdates.GetUpdates(field, updates.Type);
                            if (dvFieldUpdates[idx] == null)
                            {
                                dvFieldUpdates[idx] = mergedDVUpdates.NewUpdates(field, updates.Type, mergeState.SegmentInfo.DocCount);
                            }
                            updatesIters[idx] = updates.GetIterator();
                            updatesIters[idx].NextDoc(); // advance to first update doc
                            ++idx;
                        }
                    }
                    //      System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: info=" + info + ", mergingUpdates=" + mergingUpdates);

                    if (prevLiveDocs != null)
                    {
                        // If we had deletions on starting the merge we must
                        // still have deletions now:
                        Debug.Assert(currentLiveDocs != null);
                        Debug.Assert(prevLiveDocs.Length() == docCount);
                        Debug.Assert(currentLiveDocs.Length() == docCount);

                        // There were deletes on this segment when the merge
                        // started.  The merge has collapsed away those
                        // deletes, but, if new deletes were flushed since
                        // the merge started, we must now carefully keep any
                        // newly flushed deletes but mapping them to the new
                        // docIDs.

                        // Since we copy-on-write, if any new deletes were
                        // applied after merging has started, we can just
                        // check if the before/after liveDocs have changed.
                        // If so, we must carefully merge the liveDocs one
                        // doc at a time:
                        if (currentLiveDocs != prevLiveDocs)
                        {
                            // this means this segment received new deletes
                            // since we started the merge, so we
                            // must merge them:
                            for (int j = 0; j < docCount; j++)
                            {
                                if (!prevLiveDocs.Get(j))
                                {
                                    Debug.Assert(!currentLiveDocs.Get(j));
                                }
                                else
                                {
                                    if (!currentLiveDocs.Get(j))
                                    {
                                        if (holder.MergedDeletesAndUpdates_Renamed == null || !holder.InitializedWritableLiveDocs)
                                        {
                                            holder.Init(readerPool, merge, mergeState, true);
                                        }
                                        holder.MergedDeletesAndUpdates_Renamed.Delete(holder.DocMap.Map(docUpto));
                                        if (mergingFields != null) // advance all iters beyond the deleted document
                                        {
                                            SkipDeletedDoc(updatesIters, j);
                                        }
                                    }
                                    else if (mergingFields != null)
                                    {
                                        MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
                                    }
                                    docUpto++;
                                }
                            }
                        }
                        else if (mergingFields != null)
                        {
                            // need to check each non-deleted document if it has any updates
                            for (int j = 0; j < docCount; j++)
                            {
                                if (prevLiveDocs.Get(j))
                                {
                                    // document isn't deleted, check if any of the fields have an update to it
                                    MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
                                    // advance docUpto for every non-deleted document
                                    docUpto++;
                                }
                                else
                                {
                                    // advance all iters beyond the deleted document
                                    SkipDeletedDoc(updatesIters, j);
                                }
                            }
                        }
                        else
                        {
                            docUpto += info.Info.DocCount - info.DelCount - rld.PendingDeleteCount;
                        }
                    }
                    else if (currentLiveDocs != null)
                    {
                        Debug.Assert(currentLiveDocs.Length() == docCount);
                        // this segment had no deletes before but now it
                        // does:
                        for (int j = 0; j < docCount; j++)
                        {
                            if (!currentLiveDocs.Get(j))
                            {
                                if (holder.MergedDeletesAndUpdates_Renamed == null || !holder.InitializedWritableLiveDocs)
                                {
                                    holder.Init(readerPool, merge, mergeState, true);
                                }
                                holder.MergedDeletesAndUpdates_Renamed.Delete(holder.DocMap.Map(docUpto));
                                if (mergingFields != null) // advance all iters beyond the deleted document
                                {
                                    SkipDeletedDoc(updatesIters, j);
                                }
                            }
                            else if (mergingFields != null)
                            {
                                MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
                            }
                            docUpto++;
                        }
                    }
                    else if (mergingFields != null)
                    {
                        // no deletions before or after, but there were updates
                        for (int j = 0; j < docCount; j++)
                        {
                            MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
                            // advance docUpto for every non-deleted document
                            docUpto++;
                        }
                    }
                    else
                    {
                        // No deletes or updates before or after
                        docUpto += info.Info.DocCount;
                    }
                }

                Debug.Assert(docUpto == merge.Info_Renamed.Info.DocCount);

                if (mergedDVUpdates.Any())
                {
                    //      System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: mergedDeletes.info=" + mergedDeletes.info + ", mergedFieldUpdates=" + mergedFieldUpdates);
                    bool success = false;
                    try
                    {
                        // if any error occurs while writing the field updates we should release
                        // the info, otherwise it stays in the pool but is considered not "live"
                        // which later causes false exceptions in pool.dropAll().
                        // NOTE: currently this is the only place which throws a true
                        // IOException. If this ever changes, we need to extend that try/finally
                        // block to the rest of the method too.
                        holder.MergedDeletesAndUpdates_Renamed.WriteFieldUpdates(directory, mergedDVUpdates);
                        success = true;
                    }
                    finally
                    {
                        if (!success)
                        {
                            holder.MergedDeletesAndUpdates_Renamed.DropChanges();
                            readerPool.Drop(merge.Info_Renamed);
                        }
                    }
                }

                if (infoStream.IsEnabled("IW"))
                {
                    if (holder.MergedDeletesAndUpdates_Renamed == null)
                    {
                        infoStream.Message("IW", "no new deletes or field updates since merge started");
                    }
                    else
                    {
                        string msg = holder.MergedDeletesAndUpdates_Renamed.PendingDeleteCount + " new deletes";
                        if (mergedDVUpdates.Any())
                        {
                            msg += " and " + mergedDVUpdates.Size() + " new field updates";
                        }
                        msg += " since merge started";
                        infoStream.Message("IW", msg);
                    }
                }

                merge.Info_Renamed.BufferedDeletesGen = minGen;

                return holder.MergedDeletesAndUpdates_Renamed;
            }
        }
Example #24
0
 private void MaybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState, int docUpto, MergedDeletesAndUpdates holder, string[] mergingFields, DocValuesFieldUpdates[] dvFieldUpdates, DocValuesFieldUpdates.Iterator[] updatesIters, int curDoc)
 {
     int newDoc = -1;
     for (int idx = 0; idx < mergingFields.Length; idx++)
     {
         DocValuesFieldUpdates.Iterator updatesIter = updatesIters[idx];
         if (updatesIter.Doc() == curDoc) // document has an update
         {
             if (holder.MergedDeletesAndUpdates_Renamed == null)
             {
                 holder.Init(readerPool, merge, mergeState, false);
             }
             if (newDoc == -1) // map once per all field updates, but only if there are any updates
             {
                 newDoc = holder.DocMap.Map(docUpto);
             }
             DocValuesFieldUpdates dvUpdates = dvFieldUpdates[idx];
             dvUpdates.Add(newDoc, updatesIter.Value());
             updatesIter.NextDoc(); // advance to next document
         }
         else
         {
             Debug.Assert(updatesIter.Doc() > curDoc, "field=" + mergingFields[idx] + " updateDoc=" + updatesIter.Doc() + " curDoc=" + curDoc);
         }
     }
 }
Example #25
0
 internal void Init(ReaderPool readerPool, MergePolicy.OneMerge merge, MergeState mergeState, bool initWritableLiveDocs)
 {
     if (MergedDeletesAndUpdates_Renamed == null)
     {
         MergedDeletesAndUpdates_Renamed = readerPool.Get(merge.Info_Renamed, true);
         DocMap = merge.GetDocMap(mergeState);
         Debug.Assert(DocMap.IsConsistent(merge.Info_Renamed.Info.DocCount));
     }
     if (initWritableLiveDocs && !InitializedWritableLiveDocs)
     {
         MergedDeletesAndUpdates_Renamed.InitWritableLiveDocs();
         this.InitializedWritableLiveDocs = true;
     }
 }
Example #26
0
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler)
        {
            MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("4GBStoredFields")));

            dir.Throttling = MockDirectoryWrapper.Throttling_e.NEVER;

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(newScheduler())
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(OpenMode.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType();

            ft.IsIndexed = false;
            ft.IsStored  = true;
            ft.Freeze();
            int valueLength = RandomInts.NextIntBetween(Random(), 1 << 13, 1 << 20);
            var value       = new byte[valueLength];

            for (int i = 0; i < valueLength; ++i)
            {
                // random so that even compressing codecs can't compress it
                value[i] = (byte)Random().Next(256);
            }
            Field f = new Field("fld", value, ft);

            doc.Add(f);

            int numDocs = (int)((1L << 32) / valueLength + 100);

            for (int i = 0; i < numDocs; ++i)
            {
                w.AddDocument(doc);
                if (VERBOSE && i % (numDocs / 10) == 0)
                {
                    Console.WriteLine(i + " of " + numDocs + "...");
                }
            }
            w.ForceMerge(1);
            w.Dispose();
            if (VERBOSE)
            {
                bool found = false;
                foreach (string file in dir.ListAll())
                {
                    if (file.EndsWith(".fdt", StringComparison.Ordinal))
                    {
                        long fileLength = dir.FileLength(file);
                        if (fileLength >= 1L << 32)
                        {
                            found = true;
                        }
                        Console.WriteLine("File length of " + file + " : " + fileLength);
                    }
                }
                if (!found)
                {
                    Console.WriteLine("No .fdt file larger than 4GB, test bug?");
                }
            }

            DirectoryReader rd = DirectoryReader.Open(dir);
            Document        sd = rd.Document(numDocs - 1);

            Assert.IsNotNull(sd);
            Assert.AreEqual(1, sd.Fields.Count);
            BytesRef valueRef = sd.GetBinaryValue("fld");

            Assert.IsNotNull(valueRef);
            Assert.AreEqual(new BytesRef(value), valueRef);
            rd.Dispose();

            dir.Dispose();
        }
Example #27
0
        /// <summary>
        /// Merges the indicated segments, replacing them in the stack with a
        /// single segment.
        ///
        /// @lucene.experimental
        /// </summary>
        public virtual void Merge(MergePolicy.OneMerge merge)
        {
            bool success = false;

            long t0 = DateTime.Now.Millisecond;

            try
            {
                try
                {
                    try
                    {
                        MergeInit(merge);
                        //if (merge.info != null) {
                        //System.out.println("MERGE: " + merge.info.info.name);
                        //}

                        if (infoStream.IsEnabled("IW"))
                        {
                            infoStream.Message("IW", "now merge\n  merge=" + SegString(merge.Segments) + "\n  index=" + SegString());
                        }

                        MergeMiddle(merge);
                        MergeSuccess(merge);
                        success = true;
                    }
                    catch (Exception t)
                    {
                        HandleMergeException(t, merge);
                    }
                }
                finally
                {
                    lock (this)
                    {
                        MergeFinish(merge);

                        if (!success)
                        {
                            if (infoStream.IsEnabled("IW"))
                            {
                                infoStream.Message("IW", "hit exception during merge");
                            }
                            if (merge.Info_Renamed != null && !segmentInfos.Contains(merge.Info_Renamed))
                            {
                                Deleter.Refresh(merge.Info_Renamed.Info.Name);
                            }
                        }

                        // this merge (and, generally, any change to the
                        // segments) may now enable new merges, so we call
                        // merge policy & update pending merges.
                        if (success && !merge.Aborted && (merge.MaxNumSegments != -1 || (!closed && !Closing)))
                        {
                            UpdatePendingMerges(MergeTrigger.MERGE_FINISHED, merge.MaxNumSegments);
                        }
                    }
                }
            }
            catch (System.OutOfMemoryException oom)
            {
                HandleOOM(oom, "merge");
            }
            if (merge.Info_Renamed != null && !merge.Aborted)
            {
                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "merge time " + (DateTime.Now.Millisecond - t0) + " msec for " + merge.Info_Renamed.Info.DocCount + " docs");
                }
            }
        }
		/// <summary>Create and return a new MergeThread </summary>
		protected internal virtual MergeThread GetMergeThread(IndexWriter writer, MergePolicy.OneMerge merge)
		{
			lock (this)
			{
				MergeThread thread = new MergeThread(this, writer, merge);
				thread.SetThreadPriority(mergeThreadPriority);
				thread.IsBackground = true;
				thread.Name = "Lucene Merge Thread #" + mergeThreadCount++;
				return thread;
			}
		}
Example #29
0
 /// <summary>
 /// Hook that's called when the specified merge is complete. </summary>
 internal virtual void MergeSuccess(MergePolicy.OneMerge merge)
 {
 }
			public virtual void  SetRunningMerge(MergePolicy.OneMerge merge)
			{
				lock (this)
				{
					runningMerge = merge;
				}
			}
Example #31
0
        /// <summary>
        /// Checks whether this merge involves any segments
        ///  already participating in a merge.  If not, this merge
        ///  is "registered", meaning we record that its segments
        ///  are now participating in a merge, and true is
        ///  returned.  Else (the merge conflicts) false is
        ///  returned.
        /// </summary>
        internal bool RegisterMerge(MergePolicy.OneMerge merge)
        {
            lock (this)
            {
                if (merge.RegisterDone)
                {
                    return true;
                }
                Debug.Assert(merge.Segments.Count > 0);

                if (StopMerges)
                {
                    merge.Abort();
                    throw new MergePolicy.MergeAbortedException("merge is aborted: " + SegString(merge.Segments));
                }

                bool isExternal = false;
                foreach (SegmentCommitInfo info in merge.Segments)
                {
                    if (mergingSegments.Contains(info))
                    {
                        if (infoStream.IsEnabled("IW"))
                        {
                            infoStream.Message("IW", "reject merge " + SegString(merge.Segments) + ": segment " + SegString(info) + " is already marked for merge");
                        }
                        return false;
                    }
                    if (!segmentInfos.Contains(info))
                    {
                        if (infoStream.IsEnabled("IW"))
                        {
                            infoStream.Message("IW", "reject merge " + SegString(merge.Segments) + ": segment " + SegString(info) + " does not exist in live infos");
                        }
                        return false;
                    }
                    if (info.Info.Dir != directory)
                    {
                        isExternal = true;
                    }
                    if (SegmentsToMerge.ContainsKey(info))
                    {
                        merge.MaxNumSegments = MergeMaxNumSegments;
                    }
                }

                EnsureValidMerge(merge);

                PendingMerges.AddLast(merge);

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "add merge to pendingMerges: " + SegString(merge.Segments) + " [total " + PendingMerges.Count + " pending]");
                }

                merge.MergeGen = MergeGen;
                merge.IsExternal = isExternal;

                // OK it does not conflict; now record that this merge
                // is running (while synchronized) to avoid race
                // condition where two conflicting merges from different
                // threads, start
                if (infoStream.IsEnabled("IW"))
                {
                    StringBuilder builder = new StringBuilder("registerMerge merging= [");
                    foreach (SegmentCommitInfo info in mergingSegments)
                    {
                        builder.Append(info.Info.Name).Append(", ");
                    }
                    builder.Append("]");
                    // don't call mergingSegments.toString() could lead to ConcurrentModException
                    // since merge updates the segments FieldInfos
                    if (infoStream.IsEnabled("IW"))
                    {
                        infoStream.Message("IW", builder.ToString());
                    }
                }
                foreach (SegmentCommitInfo info in merge.Segments)
                {
                    if (infoStream.IsEnabled("IW"))
                    {
                        infoStream.Message("IW", "registerMerge info=" + SegString(info));
                    }
                    mergingSegments.Add(info);
                }

                Debug.Assert(merge.EstimatedMergeBytes == 0);
                Debug.Assert(merge.TotalMergeBytes == 0);
                foreach (SegmentCommitInfo info in merge.Segments)
                {
                    if (info.Info.DocCount > 0)
                    {
                        int delCount = NumDeletedDocs(info);
                        Debug.Assert(delCount <= info.Info.DocCount);
                        double delRatio = ((double)delCount) / info.Info.DocCount;
                        merge.EstimatedMergeBytes += (long)(info.SizeInBytes() * (1.0 - delRatio));
                        merge.TotalMergeBytes += info.SizeInBytes();
                    }
                }

                // Merge is now registered
                merge.RegisterDone = true;

                return true;
            }
        }
Example #32
0
 /// <summary>
 /// Expert: <seealso cref="MergePolicy"/> is invoked whenever there are changes to the
 /// segments in the index. Its role is to select which merges to do, if any,
 /// and return a <seealso cref="MergePolicy.MergeSpecification"/> describing the merges.
 /// It also selects merges to do for forceMerge.
 ///
 /// <p>Only takes effect when IndexWriter is first created.
 /// </summary>
 public IndexWriterConfig SetMergePolicy(MergePolicy mergePolicy)
 {
     if (mergePolicy == null)
     {
         throw new System.ArgumentException("mergePolicy must not be null");
     }
     this.mergePolicy = mergePolicy;
     return this;
 }
Example #33
0
 /// <summary>
 /// Does initial setup for a merge, which is fast but holds
 ///  the synchronized lock on IndexWriter instance.
 /// </summary>
 internal void MergeInit(MergePolicy.OneMerge merge)
 {
     lock (this)
     {
         bool success = false;
         try
         {
             _mergeInit(merge);
             success = true;
         }
         finally
         {
             if (!success)
             {
                 if (infoStream.IsEnabled("IW"))
                 {
                     infoStream.Message("IW", "hit exception in mergeInit");
                 }
                 MergeFinish(merge);
             }
         }
     }
 }
Example #34
0
        public virtual void TestKeepAllDeletionPolicy()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: cycle pass="******"TEST: open writer for forceMerge");
                    }
                    writer = new IndexWriter(dir, conf);
                    policy = (KeepAllDeletionPolicy)writer.Config.IndexDeletionPolicy;
                    writer.ForceMerge(1);
                    writer.Dispose();
                }

                Assert.AreEqual(needsMerging ? 2 : 1, policy.NumOnInit);

                // If we are not auto committing then there should
                // be exactly 2 commits (one per close above):
                Assert.AreEqual(1 + (needsMerging ? 1 : 0), policy.NumOnCommit);

                // Test listCommits
                ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir);
                // 2 from closing writer
                Assert.AreEqual(1 + (needsMerging ? 1 : 0), commits.Count);

                // Make sure we can open a reader on each commit:
                foreach (IndexCommit commit in commits)
                {
                    IndexReader r = DirectoryReader.Open(commit);
                    r.Dispose();
                }

                // Simplistic check: just verify all segments_N's still
                // exist, and, I can open a reader on each:
                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                long gen = SegmentInfos.GetLastCommitGeneration(dir);
                while (gen > 0)
                {
                    IndexReader reader = DirectoryReader.Open(dir);
                    reader.Dispose();
                    dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    gen--;

                    if (gen > 0)
                    {
                        // Now that we've removed a commit point, which
                        // should have orphan'd at least one index file.
                        // Open & close a writer and assert that it
                        // actually removed something:
                        int preCount = dir.ListAll().Length;
                        writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy));
                        writer.Dispose();
                        int postCount = dir.ListAll().Length;
                        Assert.IsTrue(postCount < preCount);
                    }
                }

                dir.Dispose();
            }
        }
Example #35
0
        private void _mergeInit(MergePolicy.OneMerge merge)
        {
            lock (this)
            {
                Debug.Assert(TestPoint("startMergeInit"));

                Debug.Assert(merge.RegisterDone);
                Debug.Assert(merge.MaxNumSegments == -1 || merge.MaxNumSegments > 0);

                if (HitOOM)
                {
                    throw new InvalidOperationException("this writer hit an OutOfMemoryError; cannot merge");
                }

                if (merge.Info_Renamed != null)
                {
                    // mergeInit already done
                    return;
                }

                if (merge.Aborted)
                {
                    return;
                }

                // TODO: in the non-pool'd case this is somewhat
                // wasteful, because we open these readers, close them,
                // and then open them again for merging.  Maybe  we
                // could pre-pool them somehow in that case...

                // Lock order: IW -> BD
                BufferedUpdatesStream.ApplyDeletesResult result = BufferedUpdatesStream.ApplyDeletesAndUpdates(readerPool, merge.Segments);

                if (result.AnyDeletes)
                {
                    Checkpoint();
                }

                if (!KeepFullyDeletedSegments_Renamed && result.AllDeleted != null)
                {
                    if (infoStream.IsEnabled("IW"))
                    {
                        infoStream.Message("IW", "drop 100% deleted segments: " + result.AllDeleted);
                    }
                    foreach (SegmentCommitInfo info in result.AllDeleted)
                    {
                        segmentInfos.Remove(info);
                        if (merge.Segments.Contains(info))
                        {
                            mergingSegments.Remove(info);
                            merge.Segments.Remove(info);
                        }
                        readerPool.Drop(info);
                    }
                    Checkpoint();
                }

                // Bind a new segment name here so even with
                // ConcurrentMergePolicy we keep deterministic segment
                // names.
                string mergeSegmentName = NewSegmentName();
                SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergeSegmentName, -1, false, Codec, null);
                IDictionary<string, string> details = new Dictionary<string, string>();
                details["mergeMaxNumSegments"] = "" + merge.MaxNumSegments;
                details["mergeFactor"] = Convert.ToString(merge.Segments.Count);
                SetDiagnostics(si, SOURCE_MERGE, details);
                merge.Info = new SegmentCommitInfo(si, 0, -1L, -1L);

                //    System.out.println("[" + Thread.currentThread().getName() + "] IW._mergeInit: " + segString(merge.segments) + " into " + si);

                // Lock order: IW -> BD
                BufferedUpdatesStream.Prune(segmentInfos);

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "merge seg=" + merge.Info_Renamed.Info.Name + " " + SegString(merge.Segments));
                }
            }
        }
Example #36
0
        public virtual void TestKeepLastNDeletionPolicy()
        {
            const int N = 5;

            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                Directory dir = NewDirectory();

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);
                for (int j = 0; j < N + 1; j++)
                {
                    IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(policy).SetMaxBufferedDocs(10);
                    MergePolicy       mp   = conf.MergePolicy;
                    mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0;
                    IndexWriter writer = new IndexWriter(dir, conf);
                    policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy;
                    for (int i = 0; i < 17; i++)
                    {
                        AddDoc(writer);
                    }
                    writer.ForceMerge(1);
                    writer.Dispose();
                }

                Assert.IsTrue(policy.NumDelete > 0);
                Assert.AreEqual(N + 1, policy.NumOnInit);
                Assert.AreEqual(N + 1, policy.NumOnCommit);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                long gen = SegmentInfos.GetLastCommitGeneration(dir);
                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = DirectoryReader.Open(dir);
                        reader.Dispose();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits prior to last " + N);
                        }
                    }
                    catch (IOException /*e*/)
                    {
                        if (i != N)
                        {
                            throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Dispose();
            }
        }
Example #37
0
        /// <summary>
        /// Does fininishing for a merge, which is fast but holds
        ///  the synchronized lock on IndexWriter instance.
        /// </summary>
        public void MergeFinish(MergePolicy.OneMerge merge)
        {
            lock (this)
            {
                // forceMerge, addIndexes or finishMerges may be waiting
                // on merges to finish.
                Monitor.PulseAll(this);

                // It's possible we are called twice, eg if there was an
                // exception inside mergeInit
                if (merge.RegisterDone)
                {
                    IList<SegmentCommitInfo> sourceSegments = merge.Segments;
                    foreach (SegmentCommitInfo info in sourceSegments)
                    {
                        mergingSegments.Remove(info);
                    }
                    merge.RegisterDone = false;
                }

                RunningMerges.Remove(merge);
            }
        }
 /// <summary>
 /// Wrap the given <seealso cref="MergePolicy"/> and intercept forceMerge requests to
 /// only upgrade segments written with previous Lucene versions.
 /// </summary>
 public UpgradeIndexMergePolicy(MergePolicy @base)
 {
     this.@base = @base;
 }
Example #39
0
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(scheduler)
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            ft.OmitNorms    = true;
            MyTokenStream tokenStream = new MyTokenStream();
            Field         field       = new Field("field", tokenStream, ft);

            doc.Add(field);

            const int numDocs = 1000;

            for (int i = 0; i < numDocs; i++)
            {
                if (i % 2 == 1) // trick blockPF's little optimization
                {
                    tokenStream.n = 65536;
                }
                else
                {
                    tokenStream.n = 65537;
                }
                w.AddDocument(doc);
            }
            w.ForceMerge(1);
            w.Dispose();

            DirectoryReader oneThousand = DirectoryReader.Open(dir);

            IndexReader[] subReaders = new IndexReader[1000];
            Arrays.Fill(subReaders, oneThousand);
            MultiReader          mr   = new MultiReader(subReaders);
            BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2"));

            if (dir2 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null));

            w2.AddIndexes(mr);
            w2.ForceMerge(1);
            w2.Dispose();
            oneThousand.Dispose();

            DirectoryReader oneMillion = DirectoryReader.Open(dir2);

            subReaders = new IndexReader[2000];
            Arrays.Fill(subReaders, oneMillion);
            mr = new MultiReader(subReaders);
            BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3"));

            if (dir3 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null));

            w3.AddIndexes(mr);
            w3.ForceMerge(1);
            w3.Dispose();
            oneMillion.Dispose();

            dir.Dispose();
            dir2.Dispose();
            dir3.Dispose();
        }
Example #40
0
			public CheckAbort(MergePolicy.OneMerge merge, Directory dir)
			{
				this.merge = merge;
				this.dir = dir;
			}
 protected override void DoMerge(MergePolicy.OneMerge merge)
 {
     TotMergedBytes += merge.TotalBytesSize();
     base.DoMerge(merge);
 }
Example #42
0
        private void CloseMergeReaders(MergePolicy.OneMerge merge, bool suppressExceptions)
        {
            lock (this)
            {
                int numSegments = merge.Readers.Count;
                Exception th = null;

                bool drop = !suppressExceptions;

                for (int i = 0; i < numSegments; i++)
                {
                    SegmentReader sr = merge.Readers[i];
                    if (sr != null)
                    {
                        try
                        {
                            ReadersAndUpdates rld = readerPool.Get(sr.SegmentInfo, false);
                            // We still hold a ref so it should not have been removed:
                            Debug.Assert(rld != null);
                            if (drop)
                            {
                                rld.DropChanges();
                            }
                            else
                            {
                                rld.DropMergingUpdates();
                            }
                            rld.Release(sr);
                            readerPool.Release(rld);
                            if (drop)
                            {
                                readerPool.Drop(rld.Info);
                            }
                        }
                        catch (Exception t)
                        {
                            if (th == null)
                            {
                                th = t;
                            }
                        }
                        merge.Readers[i] = null;
                    }
                }

                // If any error occured, throw it.
                if (!suppressExceptions)
                {
                    IOUtils.ReThrow(th);
                }
            }
        }
Example #43
0
        private static void ConfigureRandom(Random r, MergePolicy mergePolicy)
        {
            if (r.NextBoolean())
            {
                mergePolicy.NoCFSRatio = 0.1 + r.NextDouble() * 0.8;
            }
            else
            {
                mergePolicy.NoCFSRatio = r.NextBoolean() ? 1.0 : 0.0;
            }

            if (Rarely())
            {
                mergePolicy.MaxCFSSegmentSizeMB = 0.2 + r.NextDouble() * 2.0;
            }
            else
            {
                mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity;
            }
        }
            protected override void DoMerge(MergePolicy.OneMerge merge)
            {
                try
                {
                    // Stall all incoming merges until we see
                    // maxMergeCount:
                    int count = RunningMergeCount.IncrementAndGet();
                    try
                    {
                        Assert.IsTrue(count <= MaxMergeCount, "count=" + count + " vs maxMergeCount=" + MaxMergeCount);
                        EnoughMergesWaiting.Signal();

                        // Stall this merge until we see exactly
                        // maxMergeCount merges waiting
                        while (true)
                        {
                            // wait for 10 milliseconds
                            if (EnoughMergesWaiting.Wait(new TimeSpan(0, 0, 0, 0, 10)) || Failed.Get())
                            {
                                break;
                            }
                        }
                        // Then sleep a bit to give a chance for the bug
                        // (too many pending merges) to appear:
                        Thread.Sleep(20);
                        base.DoMerge(merge);
                    }
                    finally
                    {
                        RunningMergeCount.DecrementAndGet();
                    }
                }
                catch (Exception t)
                {
                    Failed.Set(true);
                    Writer.MergeFinish(merge);
                    throw new Exception(t.Message, t);
                }
            }
Example #45
0
        /// <summary>
        /// Does the actual (time-consuming) work of the merge,
        ///  but without holding synchronized lock on IndexWriter
        ///  instance
        /// </summary>
        private int MergeMiddle(MergePolicy.OneMerge merge)
        {
            merge.CheckAborted(directory);

            string mergedName = merge.Info_Renamed.Info.Name;

            IList<SegmentCommitInfo> sourceSegments = merge.Segments;

            IOContext context = new IOContext(merge.MergeInfo);

            MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory);
            TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory);

            if (infoStream.IsEnabled("IW"))
            {
                infoStream.Message("IW", "merging " + SegString(merge.Segments));
            }

            merge.Readers = new List<SegmentReader>();

            // this is try/finally to make sure merger's readers are
            // closed:
            bool success = false;
            try
            {
                int segUpto = 0;
                while (segUpto < sourceSegments.Count)
                {
                    SegmentCommitInfo info = sourceSegments[segUpto];

                    // Hold onto the "live" reader; we will use this to
                    // commit merged deletes
                    ReadersAndUpdates rld = readerPool.Get(info, true);

                    // Carefully pull the most recent live docs and reader
                    SegmentReader reader;
                    Bits liveDocs;
                    int delCount;

                    lock (this)
                    {
                        // Must sync to ensure BufferedDeletesStream cannot change liveDocs,
                        // pendingDeleteCount and field updates while we pull a copy:
                        reader = rld.GetReaderForMerge(context);
                        liveDocs = rld.ReadOnlyLiveDocs;
                        delCount = rld.PendingDeleteCount + info.DelCount;

                        Debug.Assert(reader != null);
                        Debug.Assert(rld.VerifyDocCounts());

                        if (infoStream.IsEnabled("IW"))
                        {
                            if (rld.PendingDeleteCount != 0)
                            {
                                infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount + " pendingDelCount=" + rld.PendingDeleteCount);
                            }
                            else if (info.DelCount != 0)
                            {
                                infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount);
                            }
                            else
                            {
                                infoStream.Message("IW", "seg=" + SegString(info) + " no deletes");
                            }
                        }
                    }

                    // Deletes might have happened after we pulled the merge reader and
                    // before we got a read-only copy of the segment's actual live docs
                    // (taking pending deletes into account). In that case we need to
                    // make a new reader with updated live docs and del count.
                    if (reader.NumDeletedDocs() != delCount)
                    {
                        // fix the reader's live docs and del count
                        Debug.Assert(delCount > reader.NumDeletedDocs()); // beware of zombies

                        SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.Info.DocCount - delCount);
                        bool released = false;
                        try
                        {
                            rld.Release(reader);
                            released = true;
                        }
                        finally
                        {
                            if (!released)
                            {
                                newReader.DecRef();
                            }
                        }

                        reader = newReader;
                    }

                    merge.Readers.Add(reader);
                    Debug.Assert(delCount <= info.Info.DocCount, "delCount=" + delCount + " info.docCount=" + info.Info.DocCount + " rld.pendingDeleteCount=" + rld.PendingDeleteCount + " info.getDelCount()=" + info.DelCount);
                    segUpto++;
                }

                //      System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders());

                // we pass merge.getMergeReaders() instead of merge.readers to allow the
                // OneMerge to return a view over the actual segments to merge
                SegmentMerger merger = new SegmentMerger(merge.MergeReaders, merge.Info_Renamed.Info, infoStream, dirWrapper, Config_Renamed.TermIndexInterval, checkAbort, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge);

                merge.CheckAborted(directory);

                // this is where all the work happens:
                MergeState mergeState;
                bool success3 = false;
                try
                {
                    if (!merger.ShouldMerge())
                    {
                        // would result in a 0 document segment: nothing to merge!
                        mergeState = new MergeState(new List<AtomicReader>(), merge.Info_Renamed.Info, infoStream, checkAbort);
                    }
                    else
                    {
                        mergeState = merger.Merge();
                    }
                    success3 = true;
                }
                finally
                {
                    if (!success3)
                    {
                        lock (this)
                        {
                            Deleter.Refresh(merge.Info_Renamed.Info.Name);
                        }
                    }
                }
                Debug.Assert(mergeState.SegmentInfo == merge.Info_Renamed.Info);
                merge.Info_Renamed.Info.Files = new HashSet<string>(dirWrapper.CreatedFiles);

                // Record which codec was used to write the segment

                if (infoStream.IsEnabled("IW"))
                {
                    if (merge.Info_Renamed.Info.DocCount == 0)
                    {
                        infoStream.Message("IW", "merge away fully deleted segments");
                    }
                    else
                    {
                        infoStream.Message("IW", "merge codec=" + Codec + " docCount=" + merge.Info_Renamed.Info.DocCount + "; merged segment has " + (mergeState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (mergeState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (mergeState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (mergeState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (mergeState.FieldInfos.HasProx() ? "freqs" : "no freqs"));
                    }
                }

                // Very important to do this before opening the reader
                // because codec must know if prox was written for
                // this segment:
                //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
                bool useCompoundFile;
                lock (this) // Guard segmentInfos
                {
                    useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, merge.Info_Renamed);
                }

                if (useCompoundFile)
                {
                    success = false;

                    ICollection<string> filesToRemove = merge.Info_Renamed.Files();

                    try
                    {
                        filesToRemove = CreateCompoundFile(infoStream, directory, checkAbort, merge.Info_Renamed.Info, context);
                        success = true;
                    }
                    catch (System.IO.IOException ioe)
                    {
                        lock (this)
                        {
                            if (merge.Aborted)
                            {
                                // this can happen if rollback or close(false)
                                // is called -- fall through to logic below to
                                // remove the partially created CFS:
                            }
                            else
                            {
                                HandleMergeException(ioe, merge);
                            }
                        }
                    }
                    catch (Exception t)
                    {
                        HandleMergeException(t, merge);
                    }
                    finally
                    {
                        if (!success)
                        {
                            if (infoStream.IsEnabled("IW"))
                            {
                                infoStream.Message("IW", "hit exception creating compound file during merge");
                            }

                            lock (this)
                            {
                                Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION));
                                Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
                                Deleter.DeleteNewFiles(merge.Info_Renamed.Files());
                            }
                        }
                    }

                    // So that, if we hit exc in deleteNewFiles (next)
                    // or in commitMerge (later), we close the
                    // per-segment readers in the finally clause below:
                    success = false;

                    lock (this)
                    {
                        // delete new non cfs files directly: they were never
                        // registered with IFD
                        Deleter.DeleteNewFiles(filesToRemove);

                        if (merge.Aborted)
                        {
                            if (infoStream.IsEnabled("IW"))
                            {
                                infoStream.Message("IW", "abort merge after building CFS");
                            }
                            Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION));
                            Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
                            return 0;
                        }
                    }

                    merge.Info_Renamed.Info.UseCompoundFile = true;
                }
                else
                {
                    // So that, if we hit exc in commitMerge (later),
                    // we close the per-segment readers in the finally
                    // clause below:
                    success = false;
                }

                // Have codec write SegmentInfo.  Must do this after
                // creating CFS so that 1) .si isn't slurped into CFS,
                // and 2) .si reflects useCompoundFile=true change
                // above:
                bool success2 = false;
                try
                {
                    Codec.SegmentInfoFormat().SegmentInfoWriter.Write(directory, merge.Info_Renamed.Info, mergeState.FieldInfos, context);
                    success2 = true;
                }
                finally
                {
                    if (!success2)
                    {
                        lock (this)
                        {
                            Deleter.DeleteNewFiles(merge.Info_Renamed.Files());
                        }
                    }
                }

                // TODO: ideally we would freeze merge.info here!!
                // because any changes after writing the .si will be
                // lost...

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", string.Format(CultureInfo.InvariantCulture, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.Info_Renamed.SizeInBytes() / 1024.0 / 1024.0, merge.EstimatedMergeBytes / 1024 / 1024.0));
                }

                IndexReaderWarmer mergedSegmentWarmer = Config_Renamed.MergedSegmentWarmer;
                if (PoolReaders && mergedSegmentWarmer != null && merge.Info_Renamed.Info.DocCount != 0)
                {
                    ReadersAndUpdates rld = readerPool.Get(merge.Info_Renamed, true);
                    SegmentReader sr = rld.GetReader(IOContext.READ);
                    try
                    {
                        mergedSegmentWarmer.Warm(sr);
                    }
                    finally
                    {
                        lock (this)
                        {
                            rld.Release(sr);
                            readerPool.Release(rld);
                        }
                    }
                }

                // Force READ context because we merge deletes onto
                // this reader:
                if (!CommitMerge(merge, mergeState))
                {
                    // commitMerge will return false if this merge was
                    // aborted
                    return 0;
                }

                success = true;
            }
            finally
            {
                // Readers are already closed in commitMerge if we didn't hit
                // an exc:
                if (!success)
                {
                    CloseMergeReaders(merge, true);
                }
            }

            return merge.Info_Renamed.Info.DocCount;
        }
Example #46
0
        public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
        {
            if ("Lucene3x".Equals(Codec.Default.Name))
            {
                throw new Exception("this test cannot run with PreFlex codec");
            }
            Console.WriteLine("Starting Test2B");
            long TERM_COUNT = ((long)int.MaxValue) + 100000000;

            int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000);

            IList <BytesRef> savedTerms = null;

            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms"));

            //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            dir.CheckIndexOnClose = false; // don't double-checkindex

            if (true)
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                                                .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                                .SetRAMBufferSizeMB(256.0)
                                                .SetMergeScheduler(scheduler)
                                                .SetMergePolicy(NewLogMergePolicy(false, 10))
                                                .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

                MergePolicy mp = w.Config.MergePolicy;
                if (mp is LogByteSizeMergePolicy)
                {
                    // 1 petabyte:
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
                }

                Documents.Document doc = new Documents.Document();
                MyTokenStream      ts  = new MyTokenStream(Random(), TERMS_PER_DOC);

                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                customType.OmitNorms    = true;
                Field field = new Field("field", ts, customType);
                doc.Add(field);
                //w.setInfoStream(System.out);
                int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC);

                Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC);
                Console.WriteLine("numDocs=" + numDocs);

                for (int i = 0; i < numDocs; i++)
                {
                    long t0 = Environment.TickCount;
                    w.AddDocument(doc);
                    Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec");
                }
                savedTerms = ts.SavedTerms;

                Console.WriteLine("TEST: full merge");
                w.ForceMerge(1);
                Console.WriteLine("TEST: close writer");
                w.Dispose();
            }

            Console.WriteLine("TEST: open reader");
            IndexReader r = DirectoryReader.Open(dir);

            if (savedTerms == null)
            {
                savedTerms = FindTerms(r);
            }
            int numSavedTerms            = savedTerms.Count;
            IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms));

            Console.WriteLine("TEST: test big ord terms...");
            TestSavedTerms(r, bigOrdTerms);
            Console.WriteLine("TEST: test all saved terms...");
            TestSavedTerms(r, savedTerms);
            r.Dispose();

            Console.WriteLine("TEST: now CheckIndex...");
            CheckIndex.Status status = TestUtil.CheckIndex(dir);
            long tc = status.SegmentInfos[0].TermIndexStatus.TermCount;

            Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue);

            dir.Dispose();
            Console.WriteLine("TEST: done!");
        }
Example #47
0
 internal virtual void AddMergeException(MergePolicy.OneMerge merge)
 {
     lock (this)
     {
         Debug.Assert(merge.Exception != null);
         if (!MergeExceptions.Contains(merge) && MergeGen == merge.MergeGen)
         {
             MergeExceptions.Add(merge);
         }
     }
 }
Example #48
0
        // Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>());

        public virtual void RunTest(string testName)
        {
            Failed.Set(false);
            AddCount.Set(0);
            DelCount.Set(0);
            PackCount.Set(0);

            DateTime t0 = DateTime.UtcNow;

            Random        random  = new Random(Random().Next());
            LineFileDocs  docs    = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            DirectoryInfo tempDir = CreateTempDir(testName);

            Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
            if (Dir is BaseDirectoryWrapper)
            {
                ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves.
            }
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream());

            if (LuceneTestCase.TEST_NIGHTLY)
            {
                // newIWConfig makes smallish max seg size, which
                // results in tons and tons of segments for this test
                // when run nightly:
                MergePolicy mp = conf.MergePolicy;
                if (mp is TieredMergePolicy)
                {
                    ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0;
                }
                else if (mp is LogByteSizeMergePolicy)
                {
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0;
                }
                else if (mp is LogMergePolicy)
                {
                    ((LogMergePolicy)mp).MaxMergeDocs = 100000;
                }
            }

            conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this));

            if (VERBOSE)
            {
                conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out);
            }
            Writer = new IndexWriter(Dir, conf);
            TestUtil.ReduceOpenFiles(Writer);

            //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
            TaskScheduler es = null;

            DoAfterWriter(es);

            int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4);

            int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;

            ISet <string>   delIDs     = new ConcurrentHashSet <string>(new HashSet <string>());
            ISet <string>   delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>());
            IList <SubDocs> allSubDocs = new SynchronizedCollection <SubDocs>();

            DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC);

            ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            // Let index build up a bit
            Thread.Sleep(100);

            DoSearching(es, stopTime);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            for (int thread = 0; thread < indexThreads.Length; thread++)
            {
                indexThreads[thread].Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount);
            }

            IndexSearcher s = FinalSearcher;

            if (VERBOSE)
            {
                Console.WriteLine("TEST: finalSearcher=" + s);
            }

            Assert.IsFalse(Failed.Get());

            bool doFail = false;

            // Verify: make sure delIDs are in fact deleted:
            foreach (string id in delIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc);
                    doFail = true;
                }
            }

            // Verify: make sure delPackIDs are in fact deleted:
            foreach (string id in delPackIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches");
                    doFail = true;
                }
            }

            // Verify: make sure each group of sub-docs are still in docID order:
            foreach (SubDocs subDocs in allSubDocs.ToList())
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20);
                if (!subDocs.Deleted)
                {
                    // We sort by relevance but the scores should be identical so sort falls back to by docID:
                    if (hits.TotalHits != subDocs.SubIDs.Count)
                    {
                        Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits);
                        doFail = true;
                    }
                    else
                    {
                        int lastDocID  = -1;
                        int startDocID = -1;
                        foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
                        {
                            int docID = scoreDoc.Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            else
                            {
                                startDocID = docID;
                            }
                            lastDocID = docID;
                            Document doc = s.Doc(docID);
                            Assert.AreEqual(subDocs.PackID, doc.Get("packID"));
                        }

                        lastDocID = startDocID - 1;
                        foreach (string subID in subDocs.SubIDs)
                        {
                            hits = s.Search(new TermQuery(new Term("docid", subID)), 1);
                            Assert.AreEqual(1, hits.TotalHits);
                            int docID = hits.ScoreDocs[0].Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            lastDocID = docID;
                        }
                    }
                }
                else
                {
                    // Pack was deleted -- make sure its docs are
                    // deleted.  We can't verify packID is deleted
                    // because we can re-use packID for update:
                    foreach (string subID in subDocs.SubIDs)
                    {
                        Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits);
                    }
                }
            }

            // Verify: make sure all not-deleted docs are in fact
            // not deleted:
            int endID = Convert.ToInt32(docs.NextDoc().Get("docid"));

            docs.Dispose();

            for (int id = 0; id < endID; id++)
            {
                string stringID = "" + id;
                if (!delIDs.Contains(stringID))
                {
                    TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1);
                    if (hits.TotalHits != 1)
                    {
                        Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + delIDs);
                        doFail = true;
                    }
                }
            }
            Assert.IsFalse(doFail);

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);
            ReleaseSearcher(s);

            Writer.Commit();

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);

            DoClose();
            Writer.Dispose(false);

            // Cannot shutdown until after writer is closed because
            // writer has merged segment warmer that uses IS to run
            // searches, and that IS may be using this es!

            /*if (es != null)
             * {
             * es.shutdown();
             * es.awaitTermination(1, TimeUnit.SECONDS);
             * }*/

            TestUtil.CheckIndex(Dir);
            Dir.Dispose();
            System.IO.Directory.Delete(tempDir.FullName, true);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }
        }
Example #49
0
        internal readonly Codec Codec; // for writing new segments

        /// <summary>
        /// Constructs a new IndexWriter per the settings given in <code>conf</code>.
        /// If you want to make "live" changes to this writer instance, use
        /// <seealso cref="#getConfig()"/>.
        ///
        /// <p>
        /// <b>NOTE:</b> after ths writer is created, the given configuration instance
        /// cannot be passed to another writer. If you intend to do so, you should
        /// <seealso cref="IndexWriterConfig#clone() clone"/> it beforehand.
        /// </summary>
        /// <param name="d">
        ///          the index directory. The index is either created or appended
        ///          according <code>conf.getOpenMode()</code>. </param>
        /// <param name="conf">
        ///          the configuration settings according to which IndexWriter should
        ///          be initialized. </param>
        /// <exception cref="IOException">
        ///           if the directory cannot be read/written to, or if it does not
        ///           exist and <code>conf.getOpenMode()</code> is
        ///           <code>OpenMode.APPEND</code> or if there is any other low-level
        ///           IO error </exception>
        public IndexWriter(Directory d, IndexWriterConfig conf)
        {
            /*if (!InstanceFieldsInitialized)
            {
                InitializeInstanceFields();
                InstanceFieldsInitialized = true;
            }*/
            readerPool = new ReaderPool(this);
            conf.SetIndexWriter(this); // prevent reuse by other instances
            Config_Renamed = new LiveIndexWriterConfig(conf);
            directory = d;
            analyzer = Config_Renamed.Analyzer;
            infoStream = Config_Renamed.InfoStream;
            mergePolicy = Config_Renamed.MergePolicy;
            mergePolicy.IndexWriter = this;
            mergeScheduler = Config_Renamed.MergeScheduler;
            Codec = Config_Renamed.Codec;

            BufferedUpdatesStream = new BufferedUpdatesStream(infoStream);
            PoolReaders = Config_Renamed.ReaderPooling;

            WriteLock = directory.MakeLock(WRITE_LOCK_NAME);

            if (!WriteLock.Obtain(Config_Renamed.WriteLockTimeout)) // obtain write lock
            {
                throw new LockObtainFailedException("Index locked for write: " + WriteLock);
            }

            bool success = false;
            try
            {
                OpenMode_e? mode = Config_Renamed.OpenMode;
                bool create;
                if (mode == OpenMode_e.CREATE)
                {
                    create = true;
                }
                else if (mode == OpenMode_e.APPEND)
                {
                    create = false;
                }
                else
                {
                    // CREATE_OR_APPEND - create only if an index does not exist
                    create = !DirectoryReader.IndexExists(directory);
                }

                // If index is too old, reading the segments will throw
                // IndexFormatTooOldException.
                segmentInfos = new SegmentInfos();

                bool initialIndexExists = true;

                if (create)
                {
                    // Try to read first.  this is to allow create
                    // against an index that's currently open for
                    // searching.  In this case we write the next
                    // segments_N file with no segments:
                    try
                    {
                        segmentInfos.Read(directory);
                        segmentInfos.Clear();
                    }
                    catch (IOException)
                    {
                        // Likely this means it's a fresh directory
                        initialIndexExists = false;
                    }

                    // Record that we have a change (zero out all
                    // segments) pending:
                    Changed();
                }
                else
                {
                    segmentInfos.Read(directory);

                    IndexCommit commit = Config_Renamed.IndexCommit;
                    if (commit != null)
                    {
                        // Swap out all segments, but, keep metadata in
                        // SegmentInfos, like version & generation, to
                        // preserve write-once.  this is important if
                        // readers are open against the future commit
                        // points.
                        if (commit.Directory != directory)
                        {
                            throw new System.ArgumentException("IndexCommit's directory doesn't match my directory");
                        }
                        SegmentInfos oldInfos = new SegmentInfos();
                        oldInfos.Read(directory, commit.SegmentsFileName);
                        segmentInfos.Replace(oldInfos);
                        Changed();
                        if (infoStream.IsEnabled("IW"))
                        {
                            infoStream.Message("IW", "init: loaded commit \"" + commit.SegmentsFileName + "\"");
                        }
                    }
                }

                RollbackSegments = segmentInfos.CreateBackupSegmentInfos();

                // start with previous field numbers, but new FieldInfos
                GlobalFieldNumberMap = FieldNumberMap;
                Config_Renamed.FlushPolicy.Init(Config_Renamed);
                DocWriter = new DocumentsWriter(this, Config_Renamed, directory);
                eventQueue = DocWriter.EventQueue();

                // Default deleter (for backwards compatibility) is
                // KeepOnlyLastCommitDeleter:
                lock (this)
                {
                    Deleter = new IndexFileDeleter(directory, Config_Renamed.DelPolicy, segmentInfos, infoStream, this, initialIndexExists);
                }

                if (Deleter.StartingCommitDeleted)
                {
                    // Deletion policy deleted the "head" commit point.
                    // We have to mark ourself as changed so that if we
                    // are closed w/o any further changes we write a new
                    // segments_N file.
                    Changed();
                }

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "init: create=" + create);
                    MessageState();
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    if (infoStream.IsEnabled("IW"))
                    {
                        infoStream.Message("IW", "init: hit exception on init; releasing write lock");
                    }
                    WriteLock.Release();
                    IOUtils.CloseWhileHandlingException(WriteLock);
                    WriteLock = null;
                }
            }
        }
        public virtual void TestDeleteLeftoverFiles()
        {
            Directory dir = NewDirectory();

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).PreventDoubleWrite = false;
            }

            MergePolicy mergePolicy = NewLogMergePolicy(true, 10);

            // this test expects all of its segments to be in CFS
            mergePolicy.NoCFSRatio          = 1.0;
            mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity;

            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(10).SetMergePolicy(mergePolicy).SetUseCompoundFile(true));

            int i;

            for (i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            writer.Config.MergePolicy.NoCFSRatio = 0.0;
            writer.Config.SetUseCompoundFile(false);
            for (; i < 45; i++)
            {
                AddDoc(writer, i);
            }
            writer.Dispose();

            // Delete one doc so we get a .del file:
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).SetUseCompoundFile(true));
            Term searchTerm = new Term("id", "7");

            writer.DeleteDocuments(searchTerm);
            writer.Dispose();

            // Now, artificially create an extra .del file & extra
            // .s0 file:
            string[] files = dir.ListAll();

            /*
             * for(int j=0;j<files.Length;j++) {
             * System.out.println(j + ": " + files[j]);
             * }
             */

            // TODO: fix this test better
            string ext = Codec.Default.Name.Equals("SimpleText", StringComparison.Ordinal) ? ".liv" : ".del";

            // Create a bogus separate del file for a
            // segment that already has a separate del file:
            CopyFile(dir, "_0_1" + ext, "_0_2" + ext);

            // Create a bogus separate del file for a
            // segment that does not yet have a separate del file:
            CopyFile(dir, "_0_1" + ext, "_1_1" + ext);

            // Create a bogus separate del file for a
            // non-existent segment:
            CopyFile(dir, "_0_1" + ext, "_188_1" + ext);

            // Create a bogus segment file:
            CopyFile(dir, "_0.cfs", "_188.cfs");

            // Create a bogus fnm file when the CFS already exists:
            CopyFile(dir, "_0.cfs", "_0.fnm");

            // Create some old segments file:
            CopyFile(dir, "segments_2", "segments");
            CopyFile(dir, "segments_2", "segments_1");

            // Create a bogus cfs file shadowing a non-cfs segment:

            // TODO: assert is bogus (relies upon codec-specific filenames)
            Assert.IsTrue(SlowFileExists(dir, "_3.fdt") || SlowFileExists(dir, "_3.fld"));
            Assert.IsTrue(!SlowFileExists(dir, "_3.cfs"));
            CopyFile(dir, "_1.cfs", "_3.cfs");

            string[] filesPre = dir.ListAll();

            // Open & close a writer: it should delete the above 4
            // files and nothing more:
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND));
            writer.Dispose();

            string[] files2 = dir.ListAll();
            dir.Dispose();

            Array.Sort(files);
            Array.Sort(files2);

            ISet <string> dif = DifFiles(files, files2);

            if (!Arrays.Equals(files, files2))
            {
                Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n    " + AsString(files) + "\n  actual files:\n    " + AsString(files2) + "\ndiff: " + dif);
            }
        }
Example #51
0
 /// <summary>
 /// Wrap the given <see cref="MergePolicy"/> and intercept <see cref="IndexWriter.ForceMerge(int)"/> requests to
 /// only upgrade segments written with previous Lucene versions.
 /// </summary>
 public UpgradeIndexMergePolicy(MergePolicy @base)
 {
     this.m_base = @base;
 }