Size() 공개 메소드

Returns number of SegmentCommitInfos.
public Size ( ) : int
리턴 int
 protected internal override object DoBody(string segmentFileName)
 {
     var sis = new SegmentInfos();
     sis.Read(directory, segmentFileName);
     var readers = new SegmentReader[sis.Size()];
     for (int i = sis.Size() - 1; i >= 0; i--)
     {
         System.IO.IOException prior = null;
         bool success = false;
         try
         {
             readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ);
             success = true;
         }
         catch (System.IO.IOException ex)
         {
             prior = ex;
         }
         finally
         {
             if (!success)
             {
                 IOUtils.CloseWhileHandlingException(prior, readers);
             }
         }
     }
     return new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false);
 }
예제 #2
0
            protected internal override object DoBody(string segmentFileName)
            {
                var sis = new SegmentInfos();

                sis.Read(directory, segmentFileName);
                var readers = new SegmentReader[sis.Size()];

                for (int i = sis.Size() - 1; i >= 0; i--)
                {
                    System.IO.IOException prior = null;
                    bool success = false;
                    try
                    {
                        readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ);
                        success    = true;
                    }
                    catch (System.IO.IOException ex)
                    {
                        prior = ex;
                    }
                    finally
                    {
                        if (!success)
                        {
                            IOUtils.CloseWhileHandlingException(prior, readers);
                        }
                    }
                }
                return(new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false));
            }
예제 #3
0
 public virtual void ListSegments()
 {
     for (int x = 0; x < infos.Size(); x++)
     {
         SegmentCommitInfo info    = infos.Info(x);
         string            sizeStr = string.Format(CultureInfo.InvariantCulture, "{0:###,###.###}", info.SizeInBytes());
         Console.WriteLine(info.Info.Name + " " + sizeStr);
     }
 }
        public virtual void TestAllSegmentsLarge()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();
            lmp.MaxMergeDocs = 2;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(3, sis.Size());
        }
예제 #5
0
        public virtual void TestMergeFactor()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 5);
            AddDocs(writer, 3);
            AddDocs(writer, 3);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 3;
            lmp.MergeFactor  = 2;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 4 segments in the index, because of the merge factor and
            // max merge docs settings.
            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(4, sis.Size());
        }
예제 #6
0
        public virtual void TestNumDocsLimit()
        {
            // tests that the max merge docs constraint is applied during forceMerge.
            Directory dir = new RAMDirectory();

            // Prepare an index w/ several small segments and a large one.
            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 5);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 3;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 3 segments in the index, because one of them exceeds the size limit
            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(3, sis.Size());
        }
        private int GetNumberOfSegments(Directory dir)
        {
            SegmentInfos infos = new SegmentInfos();

            infos.Read(dir);
            return(infos.Size());
        }
예제 #8
0
        public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos segmentInfos)
        {
            MergeSpecification mergeSpec = null;
            //System.out.println("MRMP: findMerges sis=" + segmentInfos);

            int numSegments = segmentInfos.Size();

            IList <SegmentCommitInfo>       segments = new List <SegmentCommitInfo>();
            ICollection <SegmentCommitInfo> merging  = Writer.Get().MergingSegments;

            foreach (SegmentCommitInfo sipc in segmentInfos.Segments)
            {
                if (!merging.Contains(sipc))
                {
                    segments.Add(sipc);
                }
            }

            numSegments = segments.Count;

            if (numSegments > 1 && (numSegments > 30 || Random.Next(5) == 3))
            {
                segments = CollectionsHelper.Shuffle(segments);

                // TODO: sometimes make more than 1 merge?
                mergeSpec = new MergeSpecification();
                int segsToMerge = TestUtil.NextInt(Random, 1, numSegments);
                mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge)));
            }

            return(mergeSpec);
        }
예제 #9
0
        public virtual void TestSingleNonMergeableSegment()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3, true);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 3;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Verify that the last segment does not have deletions.
            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(1, sis.Size());
        }
예제 #10
0
        public virtual void TestOneLargeOneSmall()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 5);
            AddDocs(writer, 3);
            AddDocs(writer, 5);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 3;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(4, sis.Size());
        }
예제 #11
0
        public virtual void TestSingleMergeableTooLargeSegment()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 5, true);

            // delete the last document

            writer.DeleteDocuments(new Term("id", "4"));
            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 2;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Verify that the last segment does not have deletions.
            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(1, sis.Size());
            Assert.IsTrue(sis.Info(0).HasDeletions());
        }
예제 #12
0
 internal ReaderCommit(SegmentInfos infos, Directory dir)
 {
     SegmentsFileName_Renamed = infos.SegmentsFileName;
     this.Dir             = dir;
     UserData_Renamed     = infos.UserData;
     Files                = infos.Files(dir, true);
     Generation_Renamed   = infos.Generation;
     SegmentCount_Renamed = infos.Size();
 }
예제 #13
0
 public CommitPoint(ICollection <CommitPoint> commitsToDelete, Directory directory, SegmentInfos segmentInfos)
 {
     this.Directory_Renamed   = directory;
     this.CommitsToDelete     = commitsToDelete;
     UserData_Renamed         = segmentInfos.UserData;
     SegmentsFileName_Renamed = segmentInfos.SegmentsFileName;
     Generation_Renamed       = segmentInfos.Generation;
     Files = segmentInfos.Files(directory, true);
     SegmentCount_Renamed = segmentInfos.Size();
 }
예제 #14
0
        public virtual void TestMaxNumSegments2([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
        {
            Directory dir = NewDirectory();

            Document doc = new Document();

            doc.Add(NewStringField("content", "aaa", Field.Store.NO));

            LogDocMergePolicy ldmp = new LogDocMergePolicy();

            ldmp.MinMergeDocs = 1;
            ldmp.MergeFactor  = 4;
            var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                         .SetMaxBufferedDocs(2)
                         .SetMergePolicy(ldmp)
                         .SetMergeScheduler(scheduler);
            IndexWriter writer = new IndexWriter(dir, config);

            for (int iter = 0; iter < 10; iter++)
            {
                for (int i = 0; i < 19; i++)
                {
                    writer.AddDocument(doc);
                }

                writer.Commit();
                writer.WaitForMerges();
                writer.Commit();

                SegmentInfos sis = new SegmentInfos();
                sis.Read(dir);

                int segCount = sis.Size();
                writer.ForceMerge(7);
                writer.Commit();
                writer.WaitForMerges();

                sis = new SegmentInfos();
                sis.Read(dir);
                int optSegCount = sis.Size();

                if (segCount < 7)
                {
                    Assert.AreEqual(segCount, optSegCount);
                }
                else
                {
                    Assert.AreEqual(7, optSegCount, "seg: " + segCount);
                }
            }
            writer.Dispose();
            dir.Dispose();
        }
예제 #15
0
        public virtual void TestAddIndexes()
        {
            Directory   dir1   = NewDirectory();
            Directory   dir2   = NewDirectory();
            IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document d1 = new Document();

            d1.Add(new TextField("f1", "first field", Field.Store.YES));
            d1.Add(new TextField("f2", "second field", Field.Store.YES));
            writer.AddDocument(d1);

            writer.Dispose();
            writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document  d2          = new Document();
            FieldType customType2 = new FieldType(TextField.TYPE_STORED);

            customType2.StoreTermVectors = true;
            d2.Add(new TextField("f2", "second field", Field.Store.YES));
            d2.Add(new Field("f1", "first field", customType2));
            d2.Add(new TextField("f3", "third field", Field.Store.YES));
            d2.Add(new TextField("f4", "fourth field", Field.Store.YES));
            writer.AddDocument(d2);

            writer.Dispose();

            writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
            writer.AddIndexes(dir2);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir1);
            Assert.AreEqual(2, sis.Size());

            FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
            FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));

            Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
            Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
            // make sure the ordering of the "external" segment is preserved
            Assert.AreEqual("f2", fis2.FieldInfo(0).Name);
            Assert.AreEqual("f1", fis2.FieldInfo(1).Name);
            Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
            Assert.AreEqual("f4", fis2.FieldInfo(3).Name);

            dir1.Dispose();
            dir2.Dispose();
        }
        private int CheckAllSegmentsUpgraded(Directory dir)
        {
            SegmentInfos infos = new SegmentInfos();

            infos.Read(dir);
            if (VERBOSE)
            {
                Console.WriteLine("checkAllSegmentsUpgraded: " + infos);
            }
            foreach (SegmentCommitInfo si in infos.Segments)
            {
                Assert.AreEqual(Constants.LUCENE_MAIN_VERSION, si.Info.Version);
            }
            return(infos.Size());
        }
        public virtual void TestAddIndexes()
        {
            Directory dir1 = NewDirectory();
            Directory dir2 = NewDirectory();
            IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document d1 = new Document();
            d1.Add(new TextField("f1", "first field", Field.Store.YES));
            d1.Add(new TextField("f2", "second field", Field.Store.YES));
            writer.AddDocument(d1);

            writer.Dispose();
            writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document d2 = new Document();
            FieldType customType2 = new FieldType(TextField.TYPE_STORED);
            customType2.StoreTermVectors = true;
            d2.Add(new TextField("f2", "second field", Field.Store.YES));
            d2.Add(new Field("f1", "first field", customType2));
            d2.Add(new TextField("f3", "third field", Field.Store.YES));
            d2.Add(new TextField("f4", "fourth field", Field.Store.YES));
            writer.AddDocument(d2);

            writer.Dispose();

            writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
            writer.AddIndexes(dir2);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir1);
            Assert.AreEqual(2, sis.Size());

            FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
            FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));

            Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
            Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
            // make sure the ordering of the "external" segment is preserved
            Assert.AreEqual("f2", fis2.FieldInfo(0).Name);
            Assert.AreEqual("f1", fis2.FieldInfo(1).Name);
            Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
            Assert.AreEqual("f4", fis2.FieldInfo(3).Name);

            dir1.Dispose();
            dir2.Dispose();
        }
예제 #18
0
        public virtual void TestPartialMerge()
        {
            Directory dir = NewDirectory();

            Document doc = new Document();

            doc.Add(NewStringField("content", "aaa", Field.Store.NO));
            int incrMin = TEST_NIGHTLY ? 15 : 40;

            for (int numDocs = 10; numDocs < 500; numDocs += TestUtil.NextInt(Random(), incrMin, 5 * incrMin))
            {
                LogDocMergePolicy ldmp = new LogDocMergePolicy();
                ldmp.MinMergeDocs = 1;
                ldmp.MergeFactor  = 5;
                IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(ldmp));
                for (int j = 0; j < numDocs; j++)
                {
                    writer.AddDocument(doc);
                }
                writer.Dispose();

                SegmentInfos sis = new SegmentInfos();
                sis.Read(dir);
                int segCount = sis.Size();

                ldmp             = new LogDocMergePolicy();
                ldmp.MergeFactor = 5;
                writer           = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(ldmp));
                writer.ForceMerge(3);
                writer.Dispose();

                sis = new SegmentInfos();
                sis.Read(dir);
                int optSegCount = sis.Size();

                if (segCount < 3)
                {
                    Assert.AreEqual(segCount, optSegCount);
                }
                else
                {
                    Assert.AreEqual(3, optSegCount);
                }
            }
            dir.Dispose();
        }
        public virtual void TestPartialMerge()
        {
            Directory dir = NewDirectory();

            Document doc = new Document();
            doc.Add(NewStringField("content", "aaa", Field.Store.NO));
            int incrMin = TEST_NIGHTLY ? 15 : 40;
            for (int numDocs = 10; numDocs < 500; numDocs += TestUtil.NextInt(Random(), incrMin, 5 * incrMin))
            {
                LogDocMergePolicy ldmp = new LogDocMergePolicy();
                ldmp.MinMergeDocs = 1;
                ldmp.MergeFactor = 5;
                IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(ldmp));
                for (int j = 0; j < numDocs; j++)
                {
                    writer.AddDocument(doc);
                }
                writer.Dispose();

                SegmentInfos sis = new SegmentInfos();
                sis.Read(dir);
                int segCount = sis.Size();

                ldmp = new LogDocMergePolicy();
                ldmp.MergeFactor = 5;
                writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(ldmp));
                writer.ForceMerge(3);
                writer.Dispose();

                sis = new SegmentInfos();
                sis.Read(dir);
                int optSegCount = sis.Size();

                if (segCount < 3)
                {
                    Assert.AreEqual(segCount, optSegCount);
                }
                else
                {
                    Assert.AreEqual(3, optSegCount);
                }
            }
            dir.Dispose();
        }
예제 #20
0
        public virtual void TestBackgroundForceMerge()
        {
            Directory dir = NewDirectory();

            for (int pass = 0; pass < 2; pass++)
            {
                IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(51)));
                Document    doc    = new Document();
                doc.Add(NewStringField("field", "aaa", Field.Store.NO));
                for (int i = 0; i < 100; i++)
                {
                    writer.AddDocument(doc);
                }
                writer.ForceMerge(1, false);

                if (0 == pass)
                {
                    writer.Dispose();
                    DirectoryReader reader = DirectoryReader.Open(dir);
                    Assert.AreEqual(1, reader.Leaves.Count);
                    reader.Dispose();
                }
                else
                {
                    // Get another segment to flush so we can verify it is
                    // NOT included in the merging
                    writer.AddDocument(doc);
                    writer.AddDocument(doc);
                    writer.Dispose();

                    DirectoryReader reader = DirectoryReader.Open(dir);
                    Assert.IsTrue(reader.Leaves.Count > 1);
                    reader.Dispose();

                    SegmentInfos infos = new SegmentInfos();
                    infos.Read(dir);
                    Assert.AreEqual(2, infos.Size());
                }
            }

            dir.Dispose();
        }
        public virtual void TestBackgroundForceMerge()
        {
            Directory dir = NewDirectory();
            for (int pass = 0; pass < 2; pass++)
            {
                IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(51)));
                Document doc = new Document();
                doc.Add(NewStringField("field", "aaa", Field.Store.NO));
                for (int i = 0; i < 100; i++)
                {
                    writer.AddDocument(doc);
                }
                writer.ForceMerge(1, false);

                if (0 == pass)
                {
                    writer.Dispose();
                    DirectoryReader reader = DirectoryReader.Open(dir);
                    Assert.AreEqual(1, reader.Leaves.Count);
                    reader.Dispose();
                }
                else
                {
                    // Get another segment to flush so we can verify it is
                    // NOT included in the merging
                    writer.AddDocument(doc);
                    writer.AddDocument(doc);
                    writer.Dispose();

                    DirectoryReader reader = DirectoryReader.Open(dir);
                    Assert.IsTrue(reader.Leaves.Count > 1);
                    reader.Dispose();

                    SegmentInfos infos = new SegmentInfos();
                    infos.Read(dir);
                    Assert.AreEqual(2, infos.Size());
                }
            }

            dir.Dispose();
        }
예제 #22
0
        /// <summary>
        /// Returns true if the number of segments eligible for
        ///  merging is less than or equal to the specified {@code
        ///  maxNumSegments}.
        /// </summary>
        protected internal virtual bool IsMerged(SegmentInfos infos, int maxNumSegments, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge)
        {
            int numSegments             = infos.Size();
            int numToMerge              = 0;
            SegmentCommitInfo mergeInfo = null;
            bool segmentIsOriginal      = false;

            for (int i = 0; i < numSegments && numToMerge <= maxNumSegments; i++)
            {
                SegmentCommitInfo info = infos.Info(i);
                bool?isOriginal;
                segmentsToMerge.TryGetValue(info, out isOriginal);
                if (isOriginal != null)
                {
                    segmentIsOriginal = isOriginal.Value;
                    numToMerge++;
                    mergeInfo = info;
                }
            }

            return(numToMerge <= maxNumSegments && (numToMerge != 1 || !segmentIsOriginal || IsMerged(infos, mergeInfo)));
        }
예제 #23
0
        public virtual void TestByteSizeLimit()
        {
            // tests that the max merge size constraint is applied during forceMerge.
            Directory dir = new RAMDirectory();

            // Prepare an index w/ several small segments and a large one.
            IndexWriterConfig conf        = NewWriterConfig();
            IndexWriter       writer      = new IndexWriter(dir, conf);
            const int         numSegments = 15;

            for (int i = 0; i < numSegments; i++)
            {
                int numDocs = i == 7 ? 30 : 1;
                AddDocs(writer, numDocs);
            }
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            double min = sis.Info(0).SizeInBytes();

            conf = NewWriterConfig();
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();

            lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20);
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 3 segments in the index, because one of them exceeds the size limit
            sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(3, sis.Size());
        }
        public virtual void TestSameFieldNumbersAcrossSegments()
        {
            for (int i = 0; i < 2; i++)
            {
                Directory dir = NewDirectory();
                IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

                Document d1 = new Document();
                d1.Add(new StringField("f1", "first field", Field.Store.YES));
                d1.Add(new StringField("f2", "second field", Field.Store.YES));
                writer.AddDocument(d1);

                if (i == 1)
                {
                    writer.Dispose();
                    writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
                }
                else
                {
                    writer.Commit();
                }

                Document d2 = new Document();
                FieldType customType2 = new FieldType(TextField.TYPE_STORED);
                customType2.StoreTermVectors = true;
                d2.Add(new TextField("f2", "second field", Field.Store.NO));
                d2.Add(new Field("f1", "first field", customType2));
                d2.Add(new TextField("f3", "third field", Field.Store.NO));
                d2.Add(new TextField("f4", "fourth field", Field.Store.NO));
                writer.AddDocument(d2);

                writer.Dispose();

                SegmentInfos sis = new SegmentInfos();
                sis.Read(dir);
                Assert.AreEqual(2, sis.Size());

                FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));

                Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                Assert.AreEqual("f1", fis2.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis2.FieldInfo(1).Name);
                Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
                Assert.AreEqual("f4", fis2.FieldInfo(3).Name);

                writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
                writer.ForceMerge(1);
                writer.Dispose();

                sis = new SegmentInfos();
                sis.Read(dir);
                Assert.AreEqual(1, sis.Size());

                FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(0));

                Assert.AreEqual("f1", fis3.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis3.FieldInfo(1).Name);
                Assert.AreEqual("f3", fis3.FieldInfo(2).Name);
                Assert.AreEqual("f4", fis3.FieldInfo(3).Name);

                dir.Dispose();
            }
        }
        /// <summary>
        /// this constructor is only used for <seealso cref="#doOpenIfChanged(SegmentInfos)"/> </summary>
        private static DirectoryReader Open(Directory directory, SegmentInfos infos, IList<AtomicReader> oldReaders, int termInfosIndexDivisor)
        {
            // we put the old SegmentReaders in a map, that allows us
            // to lookup a reader using its segment name
            IDictionary<string, int?> segmentReaders = new Dictionary<string, int?>();

            if (oldReaders != null)
            {
                // create a Map SegmentName->SegmentReader
                for (int i = 0, c = oldReaders.Count; i < c; i++)
                {
                    SegmentReader sr = (SegmentReader)oldReaders[i];
                    segmentReaders[sr.SegmentName] = Convert.ToInt32(i);
                }
            }

            SegmentReader[] newReaders = new SegmentReader[infos.Size()];

            // remember which readers are shared between the old and the re-opened
            // DirectoryReader - we have to incRef those readers
            bool[] readerShared = new bool[infos.Size()];

            for (int i = infos.Size() - 1; i >= 0; i--)
            {
                // find SegmentReader for this segment
                int? oldReaderIndex;
                segmentReaders.TryGetValue(infos.Info(i).Info.Name, out oldReaderIndex);
                if (oldReaderIndex == null)
                {
                    // this is a new segment, no old SegmentReader can be reused
                    newReaders[i] = null;
                }
                else
                {
                    // there is an old reader for this segment - we'll try to reopen it
                    newReaders[i] = (SegmentReader)oldReaders[(int)oldReaderIndex];
                }

                bool success = false;
                Exception prior = null;
                try
                {
                    SegmentReader newReader;
                    if (newReaders[i] == null || infos.Info(i).Info.UseCompoundFile != newReaders[i].SegmentInfo.Info.UseCompoundFile)
                    {
                        // this is a new reader; in case we hit an exception we can close it safely
                        newReader = new SegmentReader(infos.Info(i), termInfosIndexDivisor, IOContext.READ);
                        readerShared[i] = false;
                        newReaders[i] = newReader;
                    }
                    else
                    {
                        if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen && newReaders[i].SegmentInfo.FieldInfosGen == infos.Info(i).FieldInfosGen)
                        {
                            // No change; this reader will be shared between
                            // the old and the new one, so we must incRef
                            // it:
                            readerShared[i] = true;
                            newReaders[i].IncRef();
                        }
                        else
                        {
                            // there are changes to the reader, either liveDocs or DV updates
                            readerShared[i] = false;
                            // Steal the ref returned by SegmentReader ctor:
                            Debug.Assert(infos.Info(i).Info.Dir == newReaders[i].SegmentInfo.Info.Dir);
                            Debug.Assert(infos.Info(i).HasDeletions() || infos.Info(i).HasFieldUpdates());
                            if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen)
                            {
                                // only DV updates
                                newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i], newReaders[i].LiveDocs, newReaders[i].NumDocs);
                            }
                            else
                            {
                                // both DV and liveDocs have changed
                                newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i]);
                            }
                        }
                    }
                    success = true;
                }
                catch (Exception ex)
                {
                    prior = ex;
                }
                finally
                {
                    if (!success)
                    {
                        for (i++; i < infos.Size(); i++)
                        {
                            if (newReaders[i] != null)
                            {
                                try
                                {
                                    if (!readerShared[i])
                                    {
                                        // this is a new subReader that is not used by the old one,
                                        // we can close it
                                        newReaders[i].Dispose();
                                    }
                                    else
                                    {
                                        // this subReader is also used by the old reader, so instead
                                        // closing we must decRef it
                                        newReaders[i].DecRef();
                                    }
                                }
                                catch (Exception t)
                                {
                                    if (prior == null)
                                    {
                                        prior = t;
                                    }
                                }
                            }
                        }
                    }
                    // throw the first exception
                    IOUtils.ReThrow(prior);
                }
            }
            return new StandardDirectoryReader(directory, newReaders, null, infos, termInfosIndexDivisor, false);
        }
 internal ReaderCommit(SegmentInfos infos, Directory dir)
 {
     SegmentsFileName_Renamed = infos.SegmentsFileName;
     this.Dir = dir;
     UserData_Renamed = infos.UserData;
     Files = infos.Files(dir, true);
     Generation_Renamed = infos.Generation;
     SegmentCount_Renamed = infos.Size();
 }
예제 #27
0
        public virtual void TestSameFieldNumbersAcrossSegments()
        {
            for (int i = 0; i < 2; i++)
            {
                Directory   dir    = NewDirectory();
                IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

                Document d1 = new Document();
                d1.Add(new StringField("f1", "first field", Field.Store.YES));
                d1.Add(new StringField("f2", "second field", Field.Store.YES));
                writer.AddDocument(d1);

                if (i == 1)
                {
                    writer.Dispose();
                    writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
                }
                else
                {
                    writer.Commit();
                }

                Document  d2          = new Document();
                FieldType customType2 = new FieldType(TextField.TYPE_STORED);
                customType2.StoreTermVectors = true;
                d2.Add(new TextField("f2", "second field", Field.Store.NO));
                d2.Add(new Field("f1", "first field", customType2));
                d2.Add(new TextField("f3", "third field", Field.Store.NO));
                d2.Add(new TextField("f4", "fourth field", Field.Store.NO));
                writer.AddDocument(d2);

                writer.Dispose();

                SegmentInfos sis = new SegmentInfos();
                sis.Read(dir);
                Assert.AreEqual(2, sis.Size());

                FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));

                Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                Assert.AreEqual("f1", fis2.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis2.FieldInfo(1).Name);
                Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
                Assert.AreEqual("f4", fis2.FieldInfo(3).Name);

                writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
                writer.ForceMerge(1);
                writer.Dispose();

                sis = new SegmentInfos();
                sis.Read(dir);
                Assert.AreEqual(1, sis.Size());

                FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(0));

                Assert.AreEqual("f1", fis3.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis3.FieldInfo(1).Name);
                Assert.AreEqual("f3", fis3.FieldInfo(2).Name);
                Assert.AreEqual("f4", fis3.FieldInfo(3).Name);

                dir.Dispose();
            }
        }
        public virtual void TestMaxNumSegments2()
        {
            Directory dir = NewDirectory();

            Document doc = new Document();
            doc.Add(NewStringField("content", "aaa", Field.Store.NO));

            LogDocMergePolicy ldmp = new LogDocMergePolicy();
            ldmp.MinMergeDocs = 1;
            ldmp.MergeFactor = 4;
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetMergePolicy(ldmp).SetMergeScheduler(new ConcurrentMergeScheduler()));

            for (int iter = 0; iter < 10; iter++)
            {
                for (int i = 0; i < 19; i++)
                {
                    writer.AddDocument(doc);
                }

                writer.Commit();
                writer.WaitForMerges();
                writer.Commit();

                SegmentInfos sis = new SegmentInfos();
                sis.Read(dir);

                int segCount = sis.Size();
                writer.ForceMerge(7);
                writer.Commit();
                writer.WaitForMerges();

                sis = new SegmentInfos();
                sis.Read(dir);
                int optSegCount = sis.Size();

                if (segCount < 7)
                {
                    Assert.AreEqual(segCount, optSegCount);
                }
                else
                {
                    Assert.AreEqual(7, optSegCount, "seg: " + segCount);
                }
            }
            writer.Dispose();
            dir.Dispose();
        }
        public virtual void TestFieldNumberGaps()
        {
            int numIters = AtLeast(13);
            for (int i = 0; i < numIters; i++)
            {
                Directory dir = NewDirectory();
                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES));
                    Document d = new Document();
                    d.Add(new TextField("f1", "d1 first field", Field.Store.YES));
                    d.Add(new TextField("f2", "d1 second field", Field.Store.YES));
                    writer.AddDocument(d);
                    writer.Dispose();
                    SegmentInfos sis = new SegmentInfos();
                    sis.Read(dir);
                    Assert.AreEqual(1, sis.Size());
                    FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                    Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                }

                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES));
                    Document d = new Document();
                    d.Add(new TextField("f1", "d2 first field", Field.Store.YES));
                    d.Add(new StoredField("f3", new byte[] { 1, 2, 3 }));
                    writer.AddDocument(d);
                    writer.Dispose();
                    SegmentInfos sis = new SegmentInfos();
                    sis.Read(dir);
                    Assert.AreEqual(2, sis.Size());
                    FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                    FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));
                    Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                    Assert.AreEqual("f1", fis2.FieldInfo(0).Name);
                    Assert.IsNull(fis2.FieldInfo(1));
                    Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
                }

                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES));
                    Document d = new Document();
                    d.Add(new TextField("f1", "d3 first field", Field.Store.YES));
                    d.Add(new TextField("f2", "d3 second field", Field.Store.YES));
                    d.Add(new StoredField("f3", new byte[] { 1, 2, 3, 4, 5 }));
                    writer.AddDocument(d);
                    writer.Dispose();
                    SegmentInfos sis = new SegmentInfos();
                    sis.Read(dir);
                    Assert.AreEqual(3, sis.Size());
                    FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                    FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));
                    FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(2));
                    Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                    Assert.AreEqual("f1", fis2.FieldInfo(0).Name);
                    Assert.IsNull(fis2.FieldInfo(1));
                    Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
                    Assert.AreEqual("f1", fis3.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis3.FieldInfo(1).Name);
                    Assert.AreEqual("f3", fis3.FieldInfo(2).Name);
                }

                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES));
                    writer.DeleteDocuments(new Term("f1", "d1"));
                    // nuke the first segment entirely so that the segment with gaps is
                    // loaded first!
                    writer.ForceMergeDeletes();
                    writer.Dispose();
                }

                IndexWriter writer_ = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy()).SetInfoStream(new FailOnNonBulkMergesInfoStream()));
                writer_.ForceMerge(1);
                writer_.Dispose();

                SegmentInfos sis_ = new SegmentInfos();
                sis_.Read(dir);
                Assert.AreEqual(1, sis_.Size());
                FieldInfos fis1_ = SegmentReader.ReadFieldInfos(sis_.Info(0));
                Assert.AreEqual("f1", fis1_.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis1_.FieldInfo(1).Name);
                Assert.AreEqual("f3", fis1_.FieldInfo(2).Name);
                dir.Dispose();
            }
        }
예제 #30
0
        /// <summary>
        /// Used by near real-time search </summary>
        internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes)
        {
            // IndexWriter synchronizes externally before calling
            // us, which ensures infos will not change; so there's
            // no need to process segments in reverse order
            int numSegments = infos.Size();

            IList <SegmentReader> readers = new List <SegmentReader>();
            Directory             dir     = writer.Directory;

            SegmentInfos segmentInfos = (SegmentInfos)infos.Clone();
            int          infosUpto    = 0;
            bool         success      = false;

            try
            {
                for (int i = 0; i < numSegments; i++)
                {
                    // NOTE: important that we use infos not
                    // segmentInfos here, so that we are passing the
                    // actual instance of SegmentInfoPerCommit in
                    // IndexWriter's segmentInfos:
                    SegmentCommitInfo info = infos.Info(i);
                    Debug.Assert(info.Info.Dir == dir);
                    ReadersAndUpdates rld = writer.readerPool.Get(info, true);
                    try
                    {
                        SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ);
                        if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments)
                        {
                            // Steal the ref:
                            readers.Add(reader);
                            infosUpto++;
                        }
                        else
                        {
                            reader.DecRef();
                            segmentInfos.Remove(infosUpto);
                        }
                    }
                    finally
                    {
                        writer.readerPool.Release(rld);
                    }
                }

                writer.IncRefDeleter(segmentInfos);

                StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes);
                success = true;
                return(result);
            }
            finally
            {
                if (!success)
                {
                    foreach (SegmentReader r in readers)
                    {
                        try
                        {
                            r.DecRef();
                        }
                        catch (Exception th)
                        {
                            // ignore any exception that is thrown here to not mask any original
                            // exception.
                        }
                    }
                }
            }
        }
예제 #31
0
        /// <summary>
        /// Returns the merges necessary to merge the index down
        ///  to a specified number of segments.
        ///  this respects the <seealso cref="#maxMergeSizeForForcedMerge"/> setting.
        ///  By default, and assuming {@code maxNumSegments=1}, only
        ///  one segment will be left in the index, where that segment
        ///  has no deletions pending nor separate norms, and it is in
        ///  compound file format if the current useCompoundFile
        ///  setting is true.  this method returns multiple merges
        ///  (mergeFactor at a time) so the <seealso cref="MergeScheduler"/>
        ///  in use may make use of concurrency.
        /// </summary>
        public override MergeSpecification FindForcedMerges(SegmentInfos infos, int maxNumSegments, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge)
        {
            Debug.Assert(maxNumSegments > 0);
            if (Verbose())
            {
                Message("findForcedMerges: maxNumSegs=" + maxNumSegments + " segsToMerge=" + segmentsToMerge);
            }

            // If the segments are already merged (e.g. there's only 1 segment), or
            // there are <maxNumSegments:.
            if (IsMerged(infos, maxNumSegments, segmentsToMerge))
            {
                if (Verbose())
                {
                    Message("already merged; skip");
                }
                return(null);
            }

            // Find the newest (rightmost) segment that needs to
            // be merged (other segments may have been flushed
            // since merging started):
            int last = infos.Size();

            while (last > 0)
            {
                SegmentCommitInfo info = infos.Info(--last);
                if (segmentsToMerge.ContainsKey(info))
                {
                    last++;
                    break;
                }
            }

            if (last == 0)
            {
                if (Verbose())
                {
                    Message("last == 0; skip");
                }
                return(null);
            }

            // There is only one segment already, and it is merged
            if (maxNumSegments == 1 && last == 1 && IsMerged(infos, infos.Info(0)))
            {
                if (Verbose())
                {
                    Message("already 1 seg; skip");
                }
                return(null);
            }

            // Check if there are any segments above the threshold
            bool anyTooLarge = false;

            for (int i = 0; i < last; i++)
            {
                SegmentCommitInfo info = infos.Info(i);
                if (Size(info) > MaxMergeSizeForForcedMerge || SizeDocs(info) > MaxMergeDocs_Renamed)
                {
                    anyTooLarge = true;
                    break;
                }
            }

            if (anyTooLarge)
            {
                return(FindForcedMergesSizeLimit(infos, maxNumSegments, last));
            }
            else
            {
                return(FindForcedMergesMaxNumSegments(infos, maxNumSegments, last));
            }
        }
예제 #32
0
        public override MergeSpecification FindMerges(MergeTrigger? mergeTrigger, SegmentInfos segmentInfos)
        {
            MergeSpecification mergeSpec = null;
            //System.out.println("MRMP: findMerges sis=" + segmentInfos);

            int numSegments = segmentInfos.Size();

            IList<SegmentCommitInfo> segments = new List<SegmentCommitInfo>();
            ICollection<SegmentCommitInfo> merging = Writer.Get().MergingSegments;

            foreach (SegmentCommitInfo sipc in segmentInfos.Segments)
            {
                if (!merging.Contains(sipc))
                {
                    segments.Add(sipc);
                }
            }

            numSegments = segments.Count;

            if (numSegments > 1 && (numSegments > 30 || Random.Next(5) == 3))
            {
                segments = CollectionsHelper.Shuffle(segments);

                // TODO: sometimes make more than 1 merge?
                mergeSpec = new MergeSpecification();
                int segsToMerge = TestUtil.NextInt(Random, 1, numSegments);
                mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge)));
            }

            return mergeSpec;
        }
        public virtual void TestNumDocsLimit()
        {
            // tests that the max merge docs constraint is applied during forceMerge.
            Directory dir = new RAMDirectory();

            // Prepare an index w/ several small segments and a large one.
            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 5);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();
            lmp.MaxMergeDocs = 3;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 3 segments in the index, because one of them exceeds the size limit
            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(3, sis.Size());
        }
예제 #34
0
 public CommitPoint(ICollection<CommitPoint> commitsToDelete, Directory directory, SegmentInfos segmentInfos)
 {
     this.Directory_Renamed = directory;
     this.CommitsToDelete = commitsToDelete;
     UserData_Renamed = segmentInfos.UserData;
     SegmentsFileName_Renamed = segmentInfos.SegmentsFileName;
     Generation_Renamed = segmentInfos.Generation;
     Files = segmentInfos.Files(directory, true);
     SegmentCount_Renamed = segmentInfos.Size();
 }
 private int CheckAllSegmentsUpgraded(Directory dir)
 {
     SegmentInfos infos = new SegmentInfos();
     infos.Read(dir);
     if (VERBOSE)
     {
         Console.WriteLine("checkAllSegmentsUpgraded: " + infos);
     }
     foreach (SegmentCommitInfo si in infos.Segments)
     {
         Assert.AreEqual(Constants.LUCENE_MAIN_VERSION, si.Info.Version);
     }
     return infos.Size();
 }
예제 #36
0
        /// <summary>
        /// Checks if any merges are now necessary and returns a
        ///  <seealso cref="MergePolicy.MergeSpecification"/> if so.  A merge
        ///  is necessary when there are more than {@link
        ///  #setMergeFactor} segments at a given level.  When
        ///  multiple levels have too many segments, this method
        ///  will return multiple merges, allowing the {@link
        ///  MergeScheduler} to use concurrency.
        /// </summary>
        public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos infos)
        {
            int numSegments = infos.Size();

            if (Verbose())
            {
                Message("findMerges: " + numSegments + " segments");
            }

            // Compute levels, which is just log (base mergeFactor)
            // of the size of each segment
            IList <SegmentInfoAndLevel> levels = new List <SegmentInfoAndLevel>();
            var norm = (float)Math.Log(MergeFactor_Renamed);

            ICollection <SegmentCommitInfo> mergingSegments = Writer.Get().MergingSegments;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentCommitInfo info = infos.Info(i);
                long size = Size(info);

                // Floor tiny segments
                if (size < 1)
                {
                    size = 1;
                }

                SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float)Math.Log(size) / norm, i);
                levels.Add(infoLevel);

                if (Verbose())
                {
                    long   segBytes = SizeBytes(info);
                    string extra    = mergingSegments.Contains(info) ? " [merging]" : "";
                    if (size >= MaxMergeSize)
                    {
                        extra += " [skip: too large]";
                    }
                    Message("seg=" + Writer.Get().SegString(info) + " level=" + infoLevel.Level + " size=" + String.Format(CultureInfo.InvariantCulture, "{0:0.00} MB", segBytes / 1024 / 1024.0) + extra);
                }
            }

            float levelFloor;

            if (MinMergeSize <= 0)
            {
                levelFloor = (float)0.0;
            }
            else
            {
                levelFloor = (float)(Math.Log(MinMergeSize) / norm);
            }

            // Now, we quantize the log values into levels.  The
            // first level is any segment whose log size is within
            // LEVEL_LOG_SPAN of the max size, or, who has such as
            // segment "to the right".  Then, we find the max of all
            // other segments and use that to define the next level
            // segment, etc.

            MergeSpecification spec = null;

            int numMergeableSegments = levels.Count;

            int start = 0;

            while (start < numMergeableSegments)
            {
                // Find max level of all segments not already
                // quantized.
                float maxLevel = levels[start].Level;
                for (int i = 1 + start; i < numMergeableSegments; i++)
                {
                    float level = levels[i].Level;
                    if (level > maxLevel)
                    {
                        maxLevel = level;
                    }
                }

                // Now search backwards for the rightmost segment that
                // falls into this level:
                float levelBottom;
                if (maxLevel <= levelFloor)
                {
                    // All remaining segments fall into the min level
                    levelBottom = -1.0F;
                }
                else
                {
                    levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN);

                    // Force a boundary at the level floor
                    if (levelBottom < levelFloor && maxLevel >= levelFloor)
                    {
                        levelBottom = levelFloor;
                    }
                }

                int upto = numMergeableSegments - 1;
                while (upto >= start)
                {
                    if (levels[upto].Level >= levelBottom)
                    {
                        break;
                    }
                    upto--;
                }
                if (Verbose())
                {
                    Message("  level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
                }

                // Finally, record all merges that are viable at this level:
                int end = start + MergeFactor_Renamed;
                while (end <= 1 + upto)
                {
                    bool anyTooLarge = false;
                    bool anyMerging  = false;
                    for (int i = start; i < end; i++)
                    {
                        SegmentCommitInfo info = levels[i].Info;
                        anyTooLarge |= (Size(info) >= MaxMergeSize || SizeDocs(info) >= MaxMergeDocs_Renamed);
                        if (mergingSegments.Contains(info))
                        {
                            anyMerging = true;
                            break;
                        }
                    }

                    if (anyMerging)
                    {
                        // skip
                    }
                    else if (!anyTooLarge)
                    {
                        if (spec == null)
                        {
                            spec = new MergeSpecification();
                        }
                        IList <SegmentCommitInfo> mergeInfos = new List <SegmentCommitInfo>();
                        for (int i = start; i < end; i++)
                        {
                            mergeInfos.Add(levels[i].Info);
                            Debug.Assert(infos.Contains(levels[i].Info));
                        }
                        if (Verbose())
                        {
                            Message("  add merge=" + Writer.Get().SegString(mergeInfos) + " start=" + start + " end=" + end);
                        }
                        spec.Add(new OneMerge(mergeInfos));
                    }
                    else if (Verbose())
                    {
                        Message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
                    }

                    start = end;
                    end   = start + MergeFactor_Renamed;
                }

                start = 1 + upto;
            }

            return(spec);
        }
예제 #37
0
        public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos infos)
        {
            if (Verbose())
            {
                Message("findMerges: " + infos.Size() + " segments");
            }
            if (infos.Size() == 0)
            {
                return(null);
            }
            ICollection <SegmentCommitInfo> merging    = Writer.Get().MergingSegments;
            ICollection <SegmentCommitInfo> toBeMerged = new HashSet <SegmentCommitInfo>();

            List <SegmentCommitInfo> infosSorted = new List <SegmentCommitInfo>(infos.AsList());

            infosSorted.Sort(new SegmentByteSizeDescending(this));

            // Compute total index bytes & print details about the index
            long totIndexBytes   = 0;
            long minSegmentBytes = long.MaxValue;

            foreach (SegmentCommitInfo info in infosSorted)
            {
                long segBytes = Size(info);
                if (Verbose())
                {
                    string extra = merging.Contains(info) ? " [merging]" : "";
                    if (segBytes >= MaxMergedSegmentBytes / 2.0)
                    {
                        extra += " [skip: too large]";
                    }
                    else if (segBytes < FloorSegmentBytes)
                    {
                        extra += " [floored]";
                    }
                    Message("  seg=" + Writer.Get().SegString(info) + " size=" + String.Format(CultureInfo.InvariantCulture, "{0:0.00}", segBytes / 1024 / 1024.0) + " MB" + extra);
                }

                minSegmentBytes = Math.Min(segBytes, minSegmentBytes);
                // Accum total byte size
                totIndexBytes += segBytes;
            }

            // If we have too-large segments, grace them out
            // of the maxSegmentCount:
            int tooBigCount = 0;

            while (tooBigCount < infosSorted.Count && Size(infosSorted[tooBigCount]) >= MaxMergedSegmentBytes / 2.0)
            {
                totIndexBytes -= Size(infosSorted[tooBigCount]);
                tooBigCount++;
            }

            minSegmentBytes = FloorSize(minSegmentBytes);

            // Compute max allowed segs in the index
            long   levelSize       = minSegmentBytes;
            long   bytesLeft       = totIndexBytes;
            double allowedSegCount = 0;

            while (true)
            {
                double segCountLevel = bytesLeft / (double)levelSize;
                if (segCountLevel < SegsPerTier)
                {
                    allowedSegCount += Math.Ceiling(segCountLevel);
                    break;
                }
                allowedSegCount += SegsPerTier;
                bytesLeft       -= (long)(SegsPerTier * levelSize);
                levelSize       *= MaxMergeAtOnce_Renamed;
            }
            int allowedSegCountInt = (int)allowedSegCount;

            MergeSpecification spec = null;

            // Cycle to possibly select more than one merge:
            while (true)
            {
                long mergingBytes = 0;

                // Gather eligible segments for merging, ie segments
                // not already being merged and not already picked (by
                // prior iteration of this loop) for merging:
                IList <SegmentCommitInfo> eligible = new List <SegmentCommitInfo>();
                for (int idx = tooBigCount; idx < infosSorted.Count; idx++)
                {
                    SegmentCommitInfo info = infosSorted[idx];
                    if (merging.Contains(info))
                    {
                        mergingBytes += info.SizeInBytes();
                    }
                    else if (!toBeMerged.Contains(info))
                    {
                        eligible.Add(info);
                    }
                }

                bool maxMergeIsRunning = mergingBytes >= MaxMergedSegmentBytes;

                if (Verbose())
                {
                    Message("  allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.Count + " (eligible count=" + eligible.Count + ") tooBigCount=" + tooBigCount);
                }

                if (eligible.Count == 0)
                {
                    return(spec);
                }

                if (eligible.Count >= allowedSegCountInt)
                {
                    // OK we are over budget -- find best merge!
                    MergeScore bestScore           = null;
                    IList <SegmentCommitInfo> best = null;
                    bool bestTooLarge   = false;
                    long bestMergeBytes = 0;

                    // Consider all merge starts:
                    for (int startIdx = 0; startIdx <= eligible.Count - MaxMergeAtOnce_Renamed; startIdx++)
                    {
                        long totAfterMergeBytes = 0;

                        IList <SegmentCommitInfo> candidate = new List <SegmentCommitInfo>();
                        bool hitTooLarge = false;
                        for (int idx = startIdx; idx < eligible.Count && candidate.Count < MaxMergeAtOnce_Renamed; idx++)
                        {
                            SegmentCommitInfo info = eligible[idx];
                            long segBytes          = Size(info);

                            if (totAfterMergeBytes + segBytes > MaxMergedSegmentBytes)
                            {
                                hitTooLarge = true;
                                // NOTE: we continue, so that we can try
                                // "packing" smaller segments into this merge
                                // to see if we can get closer to the max
                                // size; this in general is not perfect since
                                // this is really "bin packing" and we'd have
                                // to try different permutations.
                                continue;
                            }
                            candidate.Add(info);
                            totAfterMergeBytes += segBytes;
                        }

                        MergeScore score = Score(candidate, hitTooLarge, mergingBytes);
                        if (Verbose())
                        {
                            Message("  maybe=" + Writer.Get().SegString(candidate) + " score=" + score.Score + " " + score.Explanation + " tooLarge=" + hitTooLarge + " size=" + string.Format(CultureInfo.InvariantCulture, "%.3f MB", totAfterMergeBytes / 1024.0 / 1024.0));
                        }

                        // If we are already running a max sized merge
                        // (maxMergeIsRunning), don't allow another max
                        // sized merge to kick off:
                        if ((bestScore == null || score.Score < bestScore.Score) && (!hitTooLarge || !maxMergeIsRunning))
                        {
                            best           = candidate;
                            bestScore      = score;
                            bestTooLarge   = hitTooLarge;
                            bestMergeBytes = totAfterMergeBytes;
                        }
                    }

                    if (best != null)
                    {
                        if (spec == null)
                        {
                            spec = new MergeSpecification();
                        }
                        OneMerge merge = new OneMerge(best);
                        spec.Add(merge);
                        foreach (SegmentCommitInfo info in merge.Segments)
                        {
                            toBeMerged.Add(info);
                        }

                        if (Verbose())
                        {
                            Message("  add merge=" + Writer.Get().SegString(merge.Segments) + " size=" + string.Format(CultureInfo.InvariantCulture, "%.3f MB", bestMergeBytes / 1024.0 / 1024.0) + " score=" + string.Format(CultureInfo.InvariantCulture, "%.3f", bestScore.Score) + " " + bestScore.Explanation + (bestTooLarge ? " [max merge]" : ""));
                        }
                    }
                    else
                    {
                        return(spec);
                    }
                }
                else
                {
                    return(spec);
                }
            }
        }
예제 #38
0
        /// <summary>
        /// Returns a <seealso cref="Status"/> instance detailing
        ///  the state of the index.
        /// </summary>
        ///  <param name="onlySegments"> list of specific segment names to check
        ///
        ///  <p>As this method checks every byte in the specified
        ///  segments, on a large index it can take quite a long
        ///  time to run.
        ///
        ///  <p><b>WARNING</b>: make sure
        ///  you only call this when the index is not opened by any
        ///  writer.  </param>
        public virtual Status DoCheckIndex(IList<string> onlySegments)
        {
            NumberFormatInfo nf = CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis = new SegmentInfos();
            Status result = new Status();
            result.Dir = Dir;
            try
            {
                sis.Read(Dir);
            }
            catch (Exception t)
            {
                Msg(infoStream, "ERROR: could not read any segments file in directory");
                result.MissingSegments = true;
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(t.ToString());
                    //infoStream.WriteLine(t.StackTrace);
                }
                return result;
            }

            // find the oldest and newest segment versions
            string oldest = Convert.ToString(int.MaxValue), newest = Convert.ToString(int.MinValue);
            string oldSegs = null;
            bool foundNonNullVersion = false;
            IComparer<string> versionComparator = StringHelper.VersionComparator;
            foreach (SegmentCommitInfo si in sis.Segments)
            {
                string version = si.Info.Version;
                if (version == null)
                {
                    // pre-3.1 segment
                    oldSegs = "pre-3.1";
                }
                else
                {
                    foundNonNullVersion = true;
                    if (versionComparator.Compare(version, oldest) < 0)
                    {
                        oldest = version;
                    }
                    if (versionComparator.Compare(version, newest) > 0)
                    {
                        newest = version;
                    }
                }
            }

            int numSegments = sis.Size();
            string segmentsFileName = sis.SegmentsFileName;
            // note: we only read the format byte (required preamble) here!
            IndexInput input = null;
            try
            {
                input = Dir.OpenInput(segmentsFileName, IOContext.READONCE);
            }
            catch (Exception t)
            {
                Msg(infoStream, "ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(t.ToString());
                    //infoStream.WriteLine(t.StackTrace);
                }
                result.CantOpenSegments = true;
                return result;
            }
            int format = 0;
            try
            {
                format = input.ReadInt();
            }
            catch (Exception t)
            {
                Msg(infoStream, "ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(t.ToString());
                    //infoStream.WriteLine(t.StackTrace);
                }
                result.MissingSegmentVersion = true;
                return result;
            }
            finally
            {
                if (input != null)
                {
                    input.Dispose();
                }
            }

            string sFormat = "";
            bool skip = false;

            result.SegmentsFileName = segmentsFileName;
            result.NumSegments = numSegments;
            result.UserData = sis.UserData;
            string userDataString;
            if (sis.UserData.Count > 0)
            {
                userDataString = " userData=" + sis.UserData;
            }
            else
            {
                userDataString = "";
            }

            string versionString = null;
            if (oldSegs != null)
            {
                if (foundNonNullVersion)
                {
                    versionString = "versions=[" + oldSegs + " .. " + newest + "]";
                }
                else
                {
                    versionString = "version=" + oldSegs;
                }
            }
            else
            {
                versionString = oldest.Equals(newest) ? ("version=" + oldest) : ("versions=[" + oldest + " .. " + newest + "]");
            }

            Msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments + " " + versionString + " format=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.Partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                    foreach (string s in onlySegments)
                    {
                        infoStream.Write(" " + s);
                    }
                }
                result.SegmentsChecked.AddRange(onlySegments);
                Msg(infoStream, ":");
            }

            if (skip)
            {
                Msg(infoStream, "\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.ToolOutOfDate = true;
                return result;
            }

            result.NewSegments = (SegmentInfos)sis.Clone();
            result.NewSegments.Clear();
            result.MaxSegmentName = -1;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentCommitInfo info = sis.Info(i);
                int segmentName = 0;
                try
                {
                    segmentName = int.Parse /*Convert.ToInt32*/(info.Info.Name.Substring(1));
                }
                catch
                {
                }
                if (segmentName > result.MaxSegmentName)
                {
                    result.MaxSegmentName = segmentName;
                }
                if (onlySegments != null && !onlySegments.Contains(info.Info.Name))
                {
                    continue;
                }
                Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
                result.SegmentInfos.Add(segInfoStat);
                Msg(infoStream, "  " + (1 + i) + " of " + numSegments + ": name=" + info.Info.Name + " docCount=" + info.Info.DocCount);
                segInfoStat.Name = info.Info.Name;
                segInfoStat.DocCount = info.Info.DocCount;

                string version = info.Info.Version;
                if (info.Info.DocCount <= 0 && version != null && versionComparator.Compare(version, "4.5") >= 0)
                {
                    throw new Exception("illegal number of documents: maxDoc=" + info.Info.DocCount);
                }

                int toLoseDocCount = info.Info.DocCount;

                AtomicReader reader = null;

                try
                {
                    Codec codec = info.Info.Codec;
                    Msg(infoStream, "    codec=" + codec);
                    segInfoStat.Codec = codec;
                    Msg(infoStream, "    compound=" + info.Info.UseCompoundFile);
                    segInfoStat.Compound = info.Info.UseCompoundFile;
                    Msg(infoStream, "    numFiles=" + info.Files().Count);
                    segInfoStat.NumFiles = info.Files().Count;
                    segInfoStat.SizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    if (info.Info.GetAttribute(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY) == null)
                    {
                        // don't print size in bytes if its a 3.0 segment with shared docstores
                        Msg(infoStream, "    size (MB)=" + segInfoStat.SizeMB.ToString(nf));
                    }
                    IDictionary<string, string> diagnostics = info.Info.Diagnostics;
                    segInfoStat.Diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg(infoStream, "    diagnostics = " + diagnostics);
                    }

                    if (!info.HasDeletions())
                    {
                        Msg(infoStream, "    no deletions");
                        segInfoStat.HasDeletions = false;
                    }
                    else
                    {
                        Msg(infoStream, "    has deletions [delGen=" + info.DelGen + "]");
                        segInfoStat.HasDeletions = true;
                        segInfoStat.DeletionsGen = info.DelGen;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT);
                    Msg(infoStream, "OK");

                    segInfoStat.OpenReaderPassed = true;

                    if (infoStream != null)
                    {
                        infoStream.Write("    test: check integrity.....");
                    }
                    reader.CheckIntegrity();
                    Msg(infoStream, "OK");

                    if (infoStream != null)
                    {
                        infoStream.Write("    test: check live docs.....");
                    }
                    int numDocs = reader.NumDocs;
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions)
                    {
                        if (reader.NumDocs != info.Info.DocCount - info.DelCount)
                        {
                            throw new Exception("delete count mismatch: info=" + (info.Info.DocCount - info.DelCount) + " vs reader=" + reader.NumDocs);
                        }
                        if ((info.Info.DocCount - reader.NumDocs) > reader.MaxDoc)
                        {
                            throw new Exception("too many deleted docs: maxDoc()=" + reader.MaxDoc + " vs del count=" + (info.Info.DocCount - reader.NumDocs));
                        }
                        if (info.Info.DocCount - numDocs != info.DelCount)
                        {
                            throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs));
                        }
                        Bits liveDocs = reader.LiveDocs;
                        if (liveDocs == null)
                        {
                            throw new Exception("segment should have deletions, but liveDocs is null");
                        }
                        else
                        {
                            int numLive = 0;
                            for (int j = 0; j < liveDocs.Length(); j++)
                            {
                                if (liveDocs.Get(j))
                                {
                                    numLive++;
                                }
                            }
                            if (numLive != numDocs)
                            {
                                throw new Exception("liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive);
                            }
                        }

                        segInfoStat.NumDeleted = info.Info.DocCount - numDocs;
                        Msg(infoStream, "OK [" + (segInfoStat.NumDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.DelCount != 0)
                        {
                            throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs));
                        }
                        Bits liveDocs = reader.LiveDocs;
                        if (liveDocs != null)
                        {
                            // its ok for it to be non-null here, as long as none are set right?
                            for (int j = 0; j < liveDocs.Length(); j++)
                            {
                                if (!liveDocs.Get(j))
                                {
                                    throw new Exception("liveDocs mismatch: info says no deletions but doc " + j + " is deleted.");
                                }
                            }
                        }
                        Msg(infoStream, "OK");
                    }
                    if (reader.MaxDoc != info.Info.DocCount)
                    {
                        throw new Exception("SegmentReader.maxDoc() " + reader.MaxDoc + " != SegmentInfos.docCount " + info.Info.DocCount);
                    }

                    // Test getFieldInfos()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    FieldInfos fieldInfos = reader.FieldInfos;
                    Msg(infoStream, "OK [" + fieldInfos.Size() + " fields]");
                    segInfoStat.NumFields = fieldInfos.Size();

                    // Test Field Norms
                    segInfoStat.FieldNormStatus = TestFieldNorms(reader, infoStream);

                    // Test the Term Index
                    segInfoStat.TermIndexStatus = TestPostings(reader, infoStream, Verbose);

                    // Test Stored Fields
                    segInfoStat.StoredFieldStatus = TestStoredFields(reader, infoStream);

                    // Test Term Vectors
                    segInfoStat.TermVectorStatus = TestTermVectors(reader, infoStream, Verbose, CrossCheckTermVectors_Renamed);

                    segInfoStat.DocValuesStatus = TestDocValues(reader, infoStream);

                    // Rethrow the first exception we encountered
                    //  this will cause stats for failed segments to be incremented properly
                    if (segInfoStat.FieldNormStatus.Error != null)
                    {
                        throw new Exception("Field Norm test failed");
                    }
                    else if (segInfoStat.TermIndexStatus.Error != null)
                    {
                        throw new Exception("Term Index test failed");
                    }
                    else if (segInfoStat.StoredFieldStatus.Error != null)
                    {
                        throw new Exception("Stored Field test failed");
                    }
                    else if (segInfoStat.TermVectorStatus.Error != null)
                    {
                        throw new Exception("Term Vector test failed");
                    }
                    else if (segInfoStat.DocValuesStatus.Error != null)
                    {
                        throw new Exception("DocValues test failed");
                    }

                    Msg(infoStream, "");
                }
                catch (Exception t)
                {
                    Msg(infoStream, "FAILED");
                    string comment;
                    comment = "fixIndex() would remove reference to this segment";
                    Msg(infoStream, "    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        // LUCENENET NOTE: Some tests rely on the error type being in
                        // the message. We can't get the error type with StackTrace, we
                        // need ToString() for that.
                        infoStream.WriteLine(t.ToString());
                        //infoStream.WriteLine(t.StackTrace);
                    }
                    Msg(infoStream, "");
                    result.TotLoseDocCount += toLoseDocCount;
                    result.NumBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Dispose();
                    }
                }

                // Keeper
                result.NewSegments.Add((SegmentCommitInfo)info.Clone());
            }

            if (0 == result.NumBadSegments)
            {
                result.Clean = true;
            }
            else
            {
                Msg(infoStream, "WARNING: " + result.NumBadSegments + " broken segments (containing " + result.TotLoseDocCount + " documents) detected");
            }

            if (!(result.ValidCounter = (result.MaxSegmentName < sis.Counter)))
            {
                result.Clean = false;
                result.NewSegments.Counter = result.MaxSegmentName + 1;
                Msg(infoStream, "ERROR: Next segment name counter " + sis.Counter + " is not greater than max segment name " + result.MaxSegmentName);
            }

            if (result.Clean)
            {
                Msg(infoStream, "No problems were detected with this index.\n");
            }

            return result;
        }
예제 #39
0
        /// <summary>
        /// this constructor is only used for <seealso cref="#doOpenIfChanged(SegmentInfos)"/> </summary>
        private static DirectoryReader Open(Directory directory, SegmentInfos infos, IList <AtomicReader> oldReaders, int termInfosIndexDivisor)
        {
            // we put the old SegmentReaders in a map, that allows us
            // to lookup a reader using its segment name
            IDictionary <string, int?> segmentReaders = new Dictionary <string, int?>();

            if (oldReaders != null)
            {
                // create a Map SegmentName->SegmentReader
                for (int i = 0, c = oldReaders.Count; i < c; i++)
                {
                    SegmentReader sr = (SegmentReader)oldReaders[i];
                    segmentReaders[sr.SegmentName] = Convert.ToInt32(i);
                }
            }

            SegmentReader[] newReaders = new SegmentReader[infos.Size()];

            // remember which readers are shared between the old and the re-opened
            // DirectoryReader - we have to incRef those readers
            bool[] readerShared = new bool[infos.Size()];

            for (int i = infos.Size() - 1; i >= 0; i--)
            {
                // find SegmentReader for this segment
                int?oldReaderIndex;
                segmentReaders.TryGetValue(infos.Info(i).Info.Name, out oldReaderIndex);
                if (oldReaderIndex == null)
                {
                    // this is a new segment, no old SegmentReader can be reused
                    newReaders[i] = null;
                }
                else
                {
                    // there is an old reader for this segment - we'll try to reopen it
                    newReaders[i] = (SegmentReader)oldReaders[(int)oldReaderIndex];
                }

                bool      success = false;
                Exception prior   = null;
                try
                {
                    SegmentReader newReader;
                    if (newReaders[i] == null || infos.Info(i).Info.UseCompoundFile != newReaders[i].SegmentInfo.Info.UseCompoundFile)
                    {
                        // this is a new reader; in case we hit an exception we can close it safely
                        newReader       = new SegmentReader(infos.Info(i), termInfosIndexDivisor, IOContext.READ);
                        readerShared[i] = false;
                        newReaders[i]   = newReader;
                    }
                    else
                    {
                        if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen&& newReaders[i].SegmentInfo.FieldInfosGen == infos.Info(i).FieldInfosGen)
                        {
                            // No change; this reader will be shared between
                            // the old and the new one, so we must incRef
                            // it:
                            readerShared[i] = true;
                            newReaders[i].IncRef();
                        }
                        else
                        {
                            // there are changes to the reader, either liveDocs or DV updates
                            readerShared[i] = false;
                            // Steal the ref returned by SegmentReader ctor:
                            Debug.Assert(infos.Info(i).Info.Dir == newReaders[i].SegmentInfo.Info.Dir);
                            Debug.Assert(infos.Info(i).HasDeletions() || infos.Info(i).HasFieldUpdates());
                            if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen)
                            {
                                // only DV updates
                                newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i], newReaders[i].LiveDocs, newReaders[i].NumDocs);
                            }
                            else
                            {
                                // both DV and liveDocs have changed
                                newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i]);
                            }
                        }
                    }
                    success = true;
                }
                catch (Exception ex)
                {
                    prior = ex;
                }
                finally
                {
                    if (!success)
                    {
                        for (i++; i < infos.Size(); i++)
                        {
                            if (newReaders[i] != null)
                            {
                                try
                                {
                                    if (!readerShared[i])
                                    {
                                        // this is a new subReader that is not used by the old one,
                                        // we can close it
                                        newReaders[i].Dispose();
                                    }
                                    else
                                    {
                                        // this subReader is also used by the old reader, so instead
                                        // closing we must decRef it
                                        newReaders[i].DecRef();
                                    }
                                }
                                catch (Exception t)
                                {
                                    if (prior == null)
                                    {
                                        prior = t;
                                    }
                                }
                            }
                        }
                    }
                    // throw the first exception
                    IOUtils.ReThrow(prior);
                }
            }
            return(new StandardDirectoryReader(directory, newReaders, null, infos, termInfosIndexDivisor, false));
        }
예제 #40
0
        public virtual void TestFieldNumberGaps()
        {
            int numIters = AtLeast(13);

            for (int i = 0; i < numIters; i++)
            {
                Directory dir = NewDirectory();
                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES));
                    Document    d      = new Document();
                    d.Add(new TextField("f1", "d1 first field", Field.Store.YES));
                    d.Add(new TextField("f2", "d1 second field", Field.Store.YES));
                    writer.AddDocument(d);
                    writer.Dispose();
                    SegmentInfos sis = new SegmentInfos();
                    sis.Read(dir);
                    Assert.AreEqual(1, sis.Size());
                    FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                    Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                }

                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES));
                    Document    d      = new Document();
                    d.Add(new TextField("f1", "d2 first field", Field.Store.YES));
                    d.Add(new StoredField("f3", new byte[] { 1, 2, 3 }));
                    writer.AddDocument(d);
                    writer.Dispose();
                    SegmentInfos sis = new SegmentInfos();
                    sis.Read(dir);
                    Assert.AreEqual(2, sis.Size());
                    FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                    FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));
                    Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                    Assert.AreEqual("f1", fis2.FieldInfo(0).Name);
                    Assert.IsNull(fis2.FieldInfo(1));
                    Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
                }

                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES));
                    Document    d      = new Document();
                    d.Add(new TextField("f1", "d3 first field", Field.Store.YES));
                    d.Add(new TextField("f2", "d3 second field", Field.Store.YES));
                    d.Add(new StoredField("f3", new byte[] { 1, 2, 3, 4, 5 }));
                    writer.AddDocument(d);
                    writer.Dispose();
                    SegmentInfos sis = new SegmentInfos();
                    sis.Read(dir);
                    Assert.AreEqual(3, sis.Size());
                    FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
                    FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));
                    FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(2));
                    Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
                    Assert.AreEqual("f1", fis2.FieldInfo(0).Name);
                    Assert.IsNull(fis2.FieldInfo(1));
                    Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
                    Assert.AreEqual("f1", fis3.FieldInfo(0).Name);
                    Assert.AreEqual("f2", fis3.FieldInfo(1).Name);
                    Assert.AreEqual("f3", fis3.FieldInfo(2).Name);
                }

                {
                    IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES));
                    writer.DeleteDocuments(new Term("f1", "d1"));
                    // nuke the first segment entirely so that the segment with gaps is
                    // loaded first!
                    writer.ForceMergeDeletes();
                    writer.Dispose();
                }

                IndexWriter writer_ = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy()).SetInfoStream(new FailOnNonBulkMergesInfoStream()));
                writer_.ForceMerge(1);
                writer_.Dispose();

                SegmentInfos sis_ = new SegmentInfos();
                sis_.Read(dir);
                Assert.AreEqual(1, sis_.Size());
                FieldInfos fis1_ = SegmentReader.ReadFieldInfos(sis_.Info(0));
                Assert.AreEqual("f1", fis1_.FieldInfo(0).Name);
                Assert.AreEqual("f2", fis1_.FieldInfo(1).Name);
                Assert.AreEqual("f3", fis1_.FieldInfo(2).Name);
                dir.Dispose();
            }
        }
예제 #41
0
        /// <summary>
        /// For definition of "check point" see IndexWriter comments:
        /// "Clarification: Check Points (and commits)".
        ///
        /// Writer calls this when it has made a "consistent
        /// change" to the index, meaning new files are written to
        /// the index and the in-memory SegmentInfos have been
        /// modified to point to those files.
        ///
        /// this may or may not be a commit (segments_N may or may
        /// not have been written).
        ///
        /// We simply incref the files referenced by the new
        /// SegmentInfos and decref the files we had previously
        /// seen (if any).
        ///
        /// If this is a commit, we also call the policy to give it
        /// a chance to remove other commits.  If any commits are
        /// removed, we decref their files as well.
        /// </summary>
        public void Checkpoint(SegmentInfos segmentInfos, bool isCommit)
        {
            Debug.Assert(Locked());

            //Debug.Assert(Thread.holdsLock(Writer));
            long t0 = 0;

            if (InfoStream.IsEnabled("IFD"))
            {
                t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
                InfoStream.Message("IFD", "now checkpoint \"" + Writer.SegString(Writer.ToLiveInfos(segmentInfos).Segments) + "\" [" + segmentInfos.Size() + " segments " + "; isCommit = " + isCommit + "]");
            }

            // Try again now to delete any previously un-deletable
            // files (because they were in use, on Windows):
            DeletePendingFiles();

            // Incref the files:
            IncRef(segmentInfos, isCommit);

            if (isCommit)
            {
                // Append to our commits list:
                Commits.Add(new CommitPoint(CommitsToDelete, Directory, segmentInfos));

                // Tell policy so it can remove commits:
                Policy.OnCommit(Commits);

                // Decref files for commits that were deleted by the policy:
                DeleteCommits();
            }
            else
            {
                // DecRef old files from the last checkpoint, if any:
                DecRef(LastFiles);
                LastFiles.Clear();

                // Save files so we can decr on next checkpoint/commit:
                LastFiles.AddRange(segmentInfos.Files(Directory, false));
            }
            if (InfoStream.IsEnabled("IFD"))
            {
                long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
                InfoStream.Message("IFD", ((t1 - t0) / 1000000) + " msec to checkpoint");
            }
        }
        /// <summary>
        /// Used by near real-time search </summary>
        internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes)
        {
            // IndexWriter synchronizes externally before calling
            // us, which ensures infos will not change; so there's
            // no need to process segments in reverse order
            int numSegments = infos.Size();

            IList<SegmentReader> readers = new List<SegmentReader>();
            Directory dir = writer.Directory;

            SegmentInfos segmentInfos = (SegmentInfos)infos.Clone();
            int infosUpto = 0;
            bool success = false;
            try
            {
                for (int i = 0; i < numSegments; i++)
                {
                    // NOTE: important that we use infos not
                    // segmentInfos here, so that we are passing the
                    // actual instance of SegmentInfoPerCommit in
                    // IndexWriter's segmentInfos:
                    SegmentCommitInfo info = infos.Info(i);
                    Debug.Assert(info.Info.Dir == dir);
                    ReadersAndUpdates rld = writer.readerPool.Get(info, true);
                    try
                    {
                        SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ);
                        if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments)
                        {
                            // Steal the ref:
                            readers.Add(reader);
                            infosUpto++;
                        }
                        else
                        {
                            reader.DecRef();
                            segmentInfos.Remove(infosUpto);
                        }
                    }
                    finally
                    {
                        writer.readerPool.Release(rld);
                    }
                }

                writer.IncRefDeleter(segmentInfos);

                StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes);
                success = true;
                return result;
            }
            finally
            {
                if (!success)
                {
                    foreach (SegmentReader r in readers)
                    {
                        try
                        {
                            r.DecRef();
                        }
                        catch (Exception th)
                        {
                            // ignore any exception that is thrown here to not mask any original
                            // exception.
                        }
                    }
                }
            }
        }
        public virtual void TestSingleNonMergeableSegment()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3, true);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();
            lmp.MaxMergeDocs = 3;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Verify that the last segment does not have deletions.
            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(1, sis.Size());
        }
        public virtual void TestMergeFactor()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 5);
            AddDocs(writer, 3);
            AddDocs(writer, 3);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();
            lmp.MaxMergeDocs = 3;
            lmp.MergeFactor = 2;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 4 segments in the index, because of the merge factor and
            // max merge docs settings.
            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(4, sis.Size());
        }
        public virtual void TestSingleMergeableTooLargeSegment()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);

            AddDocs(writer, 5, true);

            // delete the last document

            writer.DeleteDocuments(new Term("id", "4"));
            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();
            lmp.MaxMergeDocs = 2;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Verify that the last segment does not have deletions.
            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(1, sis.Size());
            Assert.IsTrue(sis.Info(0).HasDeletions());
        }
        public virtual void TestByteSizeLimit()
        {
            // tests that the max merge size constraint is applied during forceMerge.
            Directory dir = new RAMDirectory();

            // Prepare an index w/ several small segments and a large one.
            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);
            const int numSegments = 15;
            for (int i = 0; i < numSegments; i++)
            {
                int numDocs = i == 7 ? 30 : 1;
                AddDocs(writer, numDocs);
            }
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            double min = sis.Info(0).SizeInBytes();

            conf = NewWriterConfig();
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
            lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20);
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 3 segments in the index, because one of them exceeds the size limit
            sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(3, sis.Size());
        }
 private int GetNumberOfSegments(Directory dir)
 {
     SegmentInfos infos = new SegmentInfos();
     infos.Read(dir);
     return infos.Size();
 }
예제 #48
0
        /// <summary>
        /// For definition of "check point" see IndexWriter comments:
        /// "Clarification: Check Points (and commits)".
        ///
        /// Writer calls this when it has made a "consistent
        /// change" to the index, meaning new files are written to
        /// the index and the in-memory SegmentInfos have been
        /// modified to point to those files.
        ///
        /// this may or may not be a commit (segments_N may or may
        /// not have been written).
        ///
        /// We simply incref the files referenced by the new
        /// SegmentInfos and decref the files we had previously
        /// seen (if any).
        ///
        /// If this is a commit, we also call the policy to give it
        /// a chance to remove other commits.  If any commits are
        /// removed, we decref their files as well.
        /// </summary>
        public void Checkpoint(SegmentInfos segmentInfos, bool isCommit)
        {
            Debug.Assert(Locked());

            //Debug.Assert(Thread.holdsLock(Writer));
            long t0 = 0;
            if (InfoStream.IsEnabled("IFD"))
            {
                t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
                InfoStream.Message("IFD", "now checkpoint \"" + Writer.SegString(Writer.ToLiveInfos(segmentInfos).Segments) + "\" [" + segmentInfos.Size() + " segments " + "; isCommit = " + isCommit + "]");
            }

            // Try again now to delete any previously un-deletable
            // files (because they were in use, on Windows):
            DeletePendingFiles();

            // Incref the files:
            IncRef(segmentInfos, isCommit);

            if (isCommit)
            {
                // Append to our commits list:
                Commits.Add(new CommitPoint(CommitsToDelete, Directory, segmentInfos));

                // Tell policy so it can remove commits:
                Policy.OnCommit(Commits);

                // Decref files for commits that were deleted by the policy:
                DeleteCommits();
            }
            else
            {
                // DecRef old files from the last checkpoint, if any:
                DecRef(LastFiles);
                LastFiles.Clear();

                // Save files so we can decr on next checkpoint/commit:
                LastFiles.AddRange(segmentInfos.Files(Directory, false));
            }
            if (InfoStream.IsEnabled("IFD"))
            {
                long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
                InfoStream.Message("IFD", ((t1 - t0) / 1000000) + " msec to checkpoint");
            }
        }