Info() public method

Returns SegmentCommitInfo at the provided index.
public Info ( int i ) : Lucene.Net.Index.SegmentCommitInfo
i int
return Lucene.Net.Index.SegmentCommitInfo
        protected internal override DirectoryIndexReader DoReopen(SegmentInfos infos)
        {
            lock (this)
            {
                DirectoryIndexReader newReader;

                if (infos.Count == 1)
                {
                    SegmentInfo si = infos.Info(0);
                    if (segment.Equals(si.name) && si.GetUseCompoundFile() == this.si.GetUseCompoundFile())
                    {
                        newReader = ReopenSegment(si);
                    }
                    else
                    {
                        // segment not referenced anymore, reopen not possible
                        // or segment format changed
                        newReader = SegmentReader.Get(readOnly, infos, infos.Info(0), false);
                    }
                }
                else
                {
                    if (readOnly)
                    {
                        return(new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] { this }, null, null));
                    }
                    else
                    {
                        return(new MultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] { this }, null, null, false));
                    }
                }

                return(newReader);
            }
        }
Example #2
0
 public virtual void ListSegments()
 {
     for (int x = 0; x < infos.Size(); x++)
     {
         SegmentCommitInfo info    = infos.Info(x);
         string            sizeStr = string.Format(CultureInfo.InvariantCulture, "{0:###,###.###}", info.SizeInBytes());
         Console.WriteLine(info.Info.Name + " " + sizeStr);
     }
 }
        public virtual void TestAddIndexes()
        {
            Directory   dir1   = NewDirectory();
            Directory   dir2   = NewDirectory();
            IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document d1 = new Document();

            d1.Add(new TextField("f1", "first field", Field.Store.YES));
            d1.Add(new TextField("f2", "second field", Field.Store.YES));
            writer.AddDocument(d1);

            writer.Dispose();
            writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document  d2          = new Document();
            FieldType customType2 = new FieldType(TextField.TYPE_STORED);

            customType2.StoreTermVectors = true;
            d2.Add(new TextField("f2", "second field", Field.Store.YES));
            d2.Add(new Field("f1", "first field", customType2));
            d2.Add(new TextField("f3", "third field", Field.Store.YES));
            d2.Add(new TextField("f4", "fourth field", Field.Store.YES));
            writer.AddDocument(d2);

            writer.Dispose();

            writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
            writer.AddIndexes(dir2);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir1);
            Assert.AreEqual(2, sis.Count);

            FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
            FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));

            Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
            Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
            // make sure the ordering of the "external" segment is preserved
            Assert.AreEqual("f2", fis2.FieldInfo(0).Name);
            Assert.AreEqual("f1", fis2.FieldInfo(1).Name);
            Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
            Assert.AreEqual("f4", fis2.FieldInfo(3).Name);

            dir1.Dispose();
            dir2.Dispose();
        }
Example #4
0
			public override System.Object DoBody()
			{
				SegmentInfos infos = new SegmentInfos();
				infos.Read(directory);
				if (infos.Count == 1)
				{
					// index is optimized
					return SegmentReader.Get(infos, infos.Info(0), closeDirectory);
				}
				IndexReader[] readers = new IndexReader[infos.Count];
				for (int i = 0; i < infos.Count; i++)
					readers[i] = SegmentReader.Get(infos.Info(i));
				return new MultiReader(directory, infos, closeDirectory, readers);
			}
Example #5
0
        internal int docShift;         // total # deleted docs that were compacted by this merge

        public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount)
        {
            this.docMaps = docMaps;
            SegmentInfo firstSegment = merge.segments.Info(0);
            int         i            = 0;

            while (true)
            {
                SegmentInfo info = infos.Info(i);
                if (info.Equals(firstSegment))
                {
                    break;
                }
                minDocID += info.docCount;
                i++;
            }

            int numDocs = 0;

            for (int j = 0; j < docMaps.Length; i++, j++)
            {
                numDocs += infos.Info(i).docCount;
                System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j)));
            }
            maxDocID = minDocID + numDocs;

            starts    = new int[docMaps.Length];
            newStarts = new int[docMaps.Length];

            starts[0]    = minDocID;
            newStarts[0] = minDocID;
            for (i = 1; i < docMaps.Length; i++)
            {
                int lastDocCount = merge.segments.Info(i - 1).docCount;
                starts[i]    = starts[i - 1] + lastDocCount;
                newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1];
            }
            docShift = numDocs - mergedDocCount;

            // There are rare cases when docShift is 0.  It happens
            // if you try to delete a docID that's out of bounds,
            // because the SegmentReader still allocates deletedDocs
            // and pretends it has deletions ... so we can't make
            // this assert here
            // assert docShift > 0;

            // Make sure it all adds up:
            System.Diagnostics.Debug.Assert(docShift == maxDocID - (newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1]));
        }
            protected internal override object DoBody(System.String segmentFileName)
            {
                SegmentInfos infos = new SegmentInfos();

                infos.Read(directory, segmentFileName);

                DirectoryIndexReader reader;

                if (infos.Count == 1)
                {
                    // index is optimized
                    reader = SegmentReader.Get(readOnly, infos, infos.Info(0), closeDirectory);
                }
                else if (readOnly)
                {
                    reader = new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory);
                }
                else
                {
                    reader = new MultiSegmentReader(directory, infos, closeDirectory, false);
                }
                reader.SetDeletionPolicy(deletionPolicy);

                return(reader);
            }
 protected internal override object DoBody(string segmentFileName)
 {
     var sis = new SegmentInfos();
     sis.Read(directory, segmentFileName);
     var readers = new SegmentReader[sis.Size()];
     for (int i = sis.Size() - 1; i >= 0; i--)
     {
         System.IO.IOException prior = null;
         bool success = false;
         try
         {
             readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ);
             success = true;
         }
         catch (System.IO.IOException ex)
         {
             prior = ex;
         }
         finally
         {
             if (!success)
             {
                 IOUtils.CloseWhileHandlingException(prior, readers);
             }
         }
     }
     return new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false);
 }
Example #8
0
        public virtual void TestSingleMergeableSegment()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 5);
            AddDocs(writer, 3);

            // delete the last document, so that the last segment is merged.
            writer.DeleteDocuments(new Term("id", "10"));
            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 3;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Verify that the last segment does not have deletions.
            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(3, sis.Count);
            Assert.IsFalse(sis.Info(2).HasDeletions);
        }
Example #9
0
            internal virtual String SegString(Directory dir)
            {
                var b           = new System.Text.StringBuilder();
                int numSegments = segments.Count;

                for (int i = 0; i < numSegments; i++)
                {
                    if (i > 0)
                    {
                        b.Append(' ');
                    }
                    b.Append(segments.Info(i).SegString(dir));
                }
                if (info != null)
                {
                    b.Append(" into ").Append(info.name);
                }
                if (optimize)
                {
                    b.Append(" [optimize]");
                }
                if (mergeDocStores)
                {
                    b.Append(" [mergeDocStores]");
                }
                return(b.ToString());
            }
        public virtual void TestByteSizeLimit()
        {
            // tests that the max merge size constraint is applied during forceMerge.
            Directory dir = new RAMDirectory();

            // Prepare an index w/ several small segments and a large one.
            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);
            const int numSegments = 15;
            for (int i = 0; i < numSegments; i++)
            {
                int numDocs = i == 7 ? 30 : 1;
                AddDocs(writer, numDocs);
            }
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            double min = sis.Info(0).SizeInBytes();

            conf = NewWriterConfig();
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
            lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20);
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 3 segments in the index, because one of them exceeds the size limit
            sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(3, sis.Size());
        }
        private int[] starts; // 1st docno for each segment

        #endregion Fields

        #region Constructors

        /// <summary>Construct reading the named set of readers. </summary>
        internal MultiSegmentReader(Directory directory, SegmentInfos sis, bool closeDirectory, bool readOnly)
            : base(directory, sis, closeDirectory, readOnly)
        {
            // To reduce the chance of hitting FileNotFound
            // (and having to retry), we open segments in
            // reverse because IndexWriter merges & deletes
            // the newest segments first.

            SegmentReader[] readers = new SegmentReader[sis.Count];
            for (int i = sis.Count - 1; i >= 0; i--)
            {
                try
                {
                    readers[i] = SegmentReader.Get(readOnly, sis.Info(i));
                }
                catch (System.IO.IOException e)
                {
                    // Close all readers we had opened:
                    for (i++; i < sis.Count; i++)
                    {
                        try
                        {
                            readers[i].Close();
                        }
                        catch (System.IO.IOException)
                        {
                            // keep going - we want to clean up as much as possible
                        }
                    }
                    throw e;
                }
            }

            Initialize(readers);
        }
Example #12
0
        /// <summary>Merges all segments from an array of indexes into this index.
        ///
        /// <p>This may be used to parallelize batch indexing.  A large document
        /// collection can be broken into sub-collections.  Each sub-collection can be
        /// indexed in parallel, on a different thread, process or machine.  The
        /// complete index can then be created by merging sub-collection indexes
        /// with this method.
        ///
        /// <p>After this completes, the index is optimized.
        /// </summary>
        public virtual void  AddIndexes(Directory[] dirs)
        {
            lock (this)
            {
                Optimize();                 // start with zero or 1 seg

                int start = segmentInfos.Count;

                for (int i = 0; i < dirs.Length; i++)
                {
                    SegmentInfos sis = new SegmentInfos();                     // read infos from dir
                    sis.Read(dirs[i]);
                    for (int j = 0; j < sis.Count; j++)
                    {
                        segmentInfos.Add(sis.Info(j));                         // add each info
                    }
                }

                // merge newly added segments in log(n) passes
                while (segmentInfos.Count > start + mergeFactor)
                {
                    for (int base_Renamed = start; base_Renamed < segmentInfos.Count; base_Renamed++)
                    {
                        int end = System.Math.Min(segmentInfos.Count, base_Renamed + mergeFactor);
                        if (end - base_Renamed > 1)
                        {
                            MergeSegments(base_Renamed, end);
                        }
                    }
                }

                Optimize();                 // final cleanup
            }
        }
Example #13
0
        public virtual void TestSingleMergeableTooLargeSegment()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf   = NewWriterConfig();
            IndexWriter       writer = new IndexWriter(dir, conf);

            AddDocs(writer, 5, true);

            // delete the last document

            writer.DeleteDocuments(new Term("id", "4"));
            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();

            lmp.MaxMergeDocs = 2;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Verify that the last segment does not have deletions.
            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            Assert.AreEqual(1, sis.Size());
            Assert.IsTrue(sis.Info(0).HasDeletions());
        }
            protected internal override object DoBody(string segmentFileName)
            {
                var sis = new SegmentInfos();

                sis.Read(directory, segmentFileName);
                var readers = new SegmentReader[sis.Count];

                for (int i = sis.Count - 1; i >= 0; i--)
                {
                    System.IO.IOException prior = null;
                    bool success = false;
                    try
                    {
                        readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ);
                        success    = true;
                    }
                    catch (System.IO.IOException ex)
                    {
                        prior = ex;
                    }
                    finally
                    {
                        if (!success)
                        {
                            IOUtils.DisposeWhileHandlingException(prior, readers);
                        }
                    }
                }
                return(new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false));
            }
            protected internal override object DoBody(string segmentFileName)
            {
                var sis = new SegmentInfos();

                sis.Read(directory, segmentFileName);
                var readers = new SegmentReader[sis.Count];

                // LUCENENET: Ported over changes from 4.8.1 to this method
                for (int i = sis.Count - 1; i >= 0; i--)
                {
                    //IOException prior = null; // LUCENENET: Not used
                    bool success = false;
                    try
                    {
                        readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ);
                        success    = true;
                    }
                    finally
                    {
                        if (!success)
                        {
                            IOUtils.DisposeWhileHandlingException(readers);
                        }
                    }
                }
                return(new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false));
            }
        /// <summary>Construct reading the named set of readers. </summary>
        internal MultiSegmentReader(Directory directory, SegmentInfos sis, bool closeDirectory, bool readOnly) : base(directory, sis, closeDirectory, readOnly)
        {
            // To reduce the chance of hitting FileNotFound
            // (and having to retry), we open segments in
            // reverse because IndexWriter merges & deletes
            // the newest segments first.

            SegmentReader[] readers = new SegmentReader[sis.Count];
            for (int i = sis.Count - 1; i >= 0; i--)
            {
                try
                {
                    readers[i] = SegmentReader.Get(readOnly, sis.Info(i));
                }
                catch (System.IO.IOException e)
                {
                    // Close all readers we had opened:
                    for (i++; i < sis.Count; i++)
                    {
                        try
                        {
                            readers[i].Close();
                        }
                        catch (System.IO.IOException)
                        {
                            // keep going - we want to clean up as much as possible
                        }
                    }
                    throw e;
                }
            }

            Initialize(readers);
        }
Example #17
0
        protected OneMerge MakeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge)
        {
            bool doCFS;

            if (!useCompoundFile)
            {
                doCFS = false;
            }
            else if (noCFSRatio == 1.0)
            {
                doCFS = true;
            }
            else
            {
                long totSize = 0;
                for (int i = 0; i < infos.Count; i++)
                {
                    totSize += Size(infos.Info(i));
                }
                long mergeSize = 0;
                for (int i = 0; i < infosToMerge.Count; i++)
                {
                    mergeSize += Size(infosToMerge.Info(i));
                }

                doCFS = mergeSize <= noCFSRatio * totSize;
            }

            return(new OneMerge(infosToMerge, doCFS));
        }
        public virtual void TestAddIndexes()
        {
            Directory dir1 = NewDirectory();
            Directory dir2 = NewDirectory();
            IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document d1 = new Document();
            d1.Add(new TextField("f1", "first field", Field.Store.YES));
            d1.Add(new TextField("f2", "second field", Field.Store.YES));
            writer.AddDocument(d1);

            writer.Dispose();
            writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            Document d2 = new Document();
            FieldType customType2 = new FieldType(TextField.TYPE_STORED);
            customType2.StoreTermVectors = true;
            d2.Add(new TextField("f2", "second field", Field.Store.YES));
            d2.Add(new Field("f1", "first field", customType2));
            d2.Add(new TextField("f3", "third field", Field.Store.YES));
            d2.Add(new TextField("f4", "fourth field", Field.Store.YES));
            writer.AddDocument(d2);

            writer.Dispose();

            writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
            writer.AddIndexes(dir2);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir1);
            Assert.AreEqual(2, sis.Size());

            FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0));
            FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1));

            Assert.AreEqual("f1", fis1.FieldInfo(0).Name);
            Assert.AreEqual("f2", fis1.FieldInfo(1).Name);
            // make sure the ordering of the "external" segment is preserved
            Assert.AreEqual("f2", fis2.FieldInfo(0).Name);
            Assert.AreEqual("f1", fis2.FieldInfo(1).Name);
            Assert.AreEqual("f3", fis2.FieldInfo(2).Name);
            Assert.AreEqual("f4", fis2.FieldInfo(3).Name);

            dir1.Dispose();
            dir2.Dispose();
        }
Example #19
0
            public override System.Object DoBody()
            {
                SegmentInfos infos = new SegmentInfos();

                infos.Read(directory);
                if (infos.Count == 1)
                {
                    // index is optimized
                    return(SegmentReader.Get(infos, infos.Info(0), closeDirectory));
                }
                IndexReader[] readers = new IndexReader[infos.Count];
                for (int i = 0; i < infos.Count; i++)
                {
                    readers[i] = SegmentReader.Get(infos.Info(i));
                }
                return(new MultiReader(directory, infos, closeDirectory, readers));
            }
		internal int docShift; // total # deleted docs that were compacted by this merge
		
		public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount)
		{
			this.docMaps = docMaps;
			SegmentInfo firstSegment = merge.segments.Info(0);
			int i = 0;
			while (true)
			{
				SegmentInfo info = infos.Info(i);
				if (info.Equals(firstSegment))
					break;
				minDocID += info.docCount;
				i++;
			}
			
			int numDocs = 0;
			for (int j = 0; j < docMaps.Length; i++, j++)
			{
				numDocs += infos.Info(i).docCount;
				System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j)));
			}
			maxDocID = minDocID + numDocs;
			
			starts = new int[docMaps.Length];
			newStarts = new int[docMaps.Length];
			
			starts[0] = minDocID;
			newStarts[0] = minDocID;
			for (i = 1; i < docMaps.Length; i++)
			{
				int lastDocCount = merge.segments.Info(i - 1).docCount;
				starts[i] = starts[i - 1] + lastDocCount;
				newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1];
			}
			docShift = numDocs - mergedDocCount;
			
			// There are rare cases when docShift is 0.  It happens
			// if you try to delete a docID that's out of bounds,
			// because the SegmentReader still allocates deletedDocs
			// and pretends it has deletions ... so we can't make
			// this assert here
			// assert docShift > 0;
			
			// Make sure it all adds up:
			System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1]));
		}
Example #21
0
        /// <summary>
        /// Returns the merges necessary to merge the index, taking the max merge
        /// size or max merge docs into consideration. this method attempts to respect
        /// the <paramref name="maxNumSegments"/> parameter, however it might be, due to size
        /// constraints, that more than that number of segments will remain in the
        /// index. Also, this method does not guarantee that exactly
        /// <paramref name="maxNumSegments"/> will remain, but &lt;= that number.
        /// </summary>
        private MergeSpecification FindForcedMergesSizeLimit(SegmentInfos infos, int maxNumSegments, int last)
        {
            MergeSpecification        spec     = new MergeSpecification();
            IList <SegmentCommitInfo> segments = infos.AsList();

            int start = last - 1;

            while (start >= 0)
            {
                SegmentCommitInfo info = infos.Info(start);
                if (Size(info) > m_maxMergeSizeForForcedMerge || SizeDocs(info) > m_maxMergeDocs)
                {
                    if (IsVerbose)
                    {
                        Message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + m_maxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + m_maxMergeDocs + ")");
                    }
                    // need to skip that segment + add a merge for the 'right' segments,
                    // unless there is only 1 which is merged.
                    if (last - start - 1 > 1 || (start != last - 1 && !IsMerged(infos, infos.Info(start + 1))))
                    {
                        // there is more than 1 segment to the right of
                        // this one, or a mergeable single segment.
                        spec.Add(new OneMerge(segments.SubList(start + 1, last)));
                    }
                    last = start;
                }
                else if (last - start == m_mergeFactor)
                {
                    // mergeFactor eligible segments were found, add them as a merge.
                    spec.Add(new OneMerge(segments.SubList(start, last)));
                    last = start;
                }
                --start;
            }

            // Add any left-over segments, unless there is just 1
            // already fully merged
            if (last > 0 && (++start + 1 < last || !IsMerged(infos, infos.Info(start))))
            {
                spec.Add(new OneMerge(segments.SubList(start, last)));
            }

            return(spec.Merges.Count == 0 ? null : spec);
        }
            internal ReaderCommit(SegmentInfos infos, Directory dir)
            {
                segmentsFileName = infos.GetCurrentSegmentFileName();
                this.dir         = dir;
                int size = infos.Count;

                files = new List <string>(size);
                files.Add(segmentsFileName);
                for (int i = 0; i < size; i++)
                {
                    SegmentInfo info = infos.Info(i);
                    if (info.dir == dir)
                    {
                        SupportClass.CollectionsSupport.AddAll(info.Files(), files);
                    }
                }
                version     = infos.GetVersion();
                generation  = infos.GetGeneration();
                isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions();
            }
Example #23
0
			public override System.Object DoBody(System.String segmentFileName)
			{
				
				SegmentInfos infos = new SegmentInfos();
				infos.Read(directory, segmentFileName);
				
				if (infos.Count == 1)
				{
					// index is optimized
					return SegmentReader.Get(infos, infos.Info(0), closeDirectory);
				}
				else
				{
					
					// To reduce the chance of hitting FileNotFound
					// (and having to retry), we open segments in
					// reverse because IndexWriter merges & deletes
					// the newest segments first.
					
					IndexReader[] readers = new IndexReader[infos.Count];
					for (int i = infos.Count - 1; i >= 0; i--)
					{
						try
						{
							readers[i] = SegmentReader.Get(infos.Info(i));
						}
						catch (System.IO.IOException e)
						{
							// Close all readers we had opened:
							for (i++; i < infos.Count; i++)
							{
								readers[i].Close();
							}
							throw e;
						}
					}
					
					return new MultiReader(directory, infos, closeDirectory, readers);
				}
			}
Example #24
0
        internal void  DecRef(SegmentInfos segmentInfos)
        {
            int size = segmentInfos.Count;

            for (int i = 0; i < size; i++)
            {
                SegmentInfo segmentInfo = segmentInfos.Info(i);
                if (segmentInfo.dir == directory)
                {
                    DecRef(segmentInfo.Files());
                }
            }
        }
Example #25
0
            public override System.Object DoBody(System.String segmentFileName)
            {
                SegmentInfos infos = new SegmentInfos();

                infos.Read(directory, segmentFileName);

                if (infos.Count == 1)
                {
                    // index is optimized
                    return(SegmentReader.Get(infos, infos.Info(0), closeDirectory));
                }
                else
                {
                    // To reduce the chance of hitting FileNotFound
                    // (and having to retry), we open segments in
                    // reverse because IndexWriter merges & deletes
                    // the newest segments first.

                    IndexReader[] readers = new IndexReader[infos.Count];
                    for (int i = infos.Count - 1; i >= 0; i--)
                    {
                        try
                        {
                            readers[i] = SegmentReader.Get(infos.Info(i));
                        }
                        catch (System.IO.IOException e)
                        {
                            // Close all readers we had opened:
                            for (i++; i < infos.Count; i++)
                            {
                                readers[i].Close();
                            }
                            throw e;
                        }
                    }

                    return(new MultiReader(directory, infos, closeDirectory, readers));
                }
            }
            public CommitPoint(IndexFileDeleter enclosingInstance, ICollection <IndexCommitPoint> commitsToDelete, Directory directory, SegmentInfos segmentInfos)
            {
                InitBlock(enclosingInstance);
                this.directory       = directory;
                this.commitsToDelete = commitsToDelete;
                segmentsFileName     = segmentInfos.GetCurrentSegmentFileName();
                version    = segmentInfos.GetVersion();
                generation = segmentInfos.GetGeneration();
                int size = segmentInfos.Count;

                files = new List <string>(size);
                files.Add(segmentsFileName);
                gen = segmentInfos.GetGeneration();
                for (int i = 0; i < size; i++)
                {
                    SegmentInfo segmentInfo = segmentInfos.Info(i);
                    if (segmentInfo.dir == Enclosing_Instance.directory)
                    {
                        SupportClass.CollectionsSupport.AddAll(segmentInfo.Files(), files);
                    }
                }
                isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions();
            }
        /// <summary>
        /// Finds merges necessary to expunge all deletes from the
        /// index.  We simply merge adjacent segments that have
        /// deletes, up to mergeFactor at a time.
        /// </summary>
        public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos, IndexWriter writer)
        {
            this.writer = writer;

            int numSegments = segmentInfos.Count;

            Message("findMergesToExpungeDeletes: " + numSegments + " segments");

            MergeSpecification spec       = new MergeSpecification();
            int firstSegmentWithDeletions = -1;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = segmentInfos.Info(i);
                if (info.HasDeletions())
                {
                    Message("  segment " + info.name + " has deletions");
                    if (firstSegmentWithDeletions == -1)
                    {
                        firstSegmentWithDeletions = i;
                    }
                    else if (i - firstSegmentWithDeletions == mergeFactor)
                    {
                        // We've seen mergeFactor segments in a row with
                        // deletions, so force a merge now:
                        Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, i), useCompoundFile));
                        firstSegmentWithDeletions = i;
                    }
                }
                else if (firstSegmentWithDeletions != -1)
                {
                    // End of a sequence of segments with deletions, so,
                    // merge those past segments even if it's fewer than
                    // mergeFactor segments
                    Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, i), useCompoundFile));
                    firstSegmentWithDeletions = -1;
                }
            }

            if (firstSegmentWithDeletions != -1)
            {
                Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, numSegments), useCompoundFile));
            }

            return(spec);
        }
Example #28
0
 /// <summary>Returns the number of documents currently in this index. </summary>
 public virtual int DocCount()
 {
     lock (this)
     {
         int count = 0;
         for (int i = 0; i < segmentInfos.Count; i++)
         {
             SegmentInfo si = segmentInfos.Info(i);
             count += si.docCount;
         }
         return(count);
     }
 }
Example #29
0
            public CommitPoint(IndexFileDeleter enclosingInstance, System.Collections.ICollection commitsToDelete, Directory directory, SegmentInfos segmentInfos)
            {
                InitBlock(enclosingInstance);
                this.directory       = directory;
                this.commitsToDelete = commitsToDelete;
                userData             = segmentInfos.GetUserData();
                segmentsFileName     = segmentInfos.GetCurrentSegmentFileName();
                version     = segmentInfos.GetVersion();
                generation  = segmentInfos.GetGeneration();
                files       = segmentInfos.Files(directory, true);
                gen         = segmentInfos.GetGeneration();
                isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions();

                System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory));
            }
Example #30
0
 protected internal override DirectoryIndexReader DoReopen(SegmentInfos infos)
 {
     lock (this)
     {
         if (infos.Count == 1)
         {
             // The index has only one segment now, so we can't refresh the MultiSegmentReader.
             // Return a new SegmentReader instead
             SegmentReader newReader = SegmentReader.Get(infos, infos.Info(0), false);
             return(newReader);
         }
         else
         {
             return(new MultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache));
         }
     }
 }
Example #31
0
        /// <summary> Rolls back state to just before the commit (this is
        /// called by commit() if there is some exception while
        /// committing).
        /// </summary>
        internal virtual void  RollbackCommit()
        {
            if (directoryOwner)
            {
                for (int i = 0; i < segmentInfos.Count; i++)
                {
                    // Rollback each segmentInfo.  Because the
                    // SegmentReader holds a reference to the
                    // SegmentInfo we can't [easily] just replace
                    // segmentInfos, so we reset it in place instead:
                    segmentInfos.Info(i).Reset(rollbackSegmentInfos.Info(i));
                }
                rollbackSegmentInfos = null;
            }

            hasChanges = rollbackHasChanges;
        }
        private bool IsOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Dictionary <SegmentInfo, SegmentInfo> segmentsToOptimize)
        {
            int         numSegments   = infos.Count;
            int         numToOptimize = 0;
            SegmentInfo optimizeInfo  = null;

            for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++)
            {
                SegmentInfo info = infos.Info(i);
                if (segmentsToOptimize.ContainsKey(info))
                {
                    numToOptimize++;
                    optimizeInfo = info;
                }
            }

            return(numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(writer, optimizeInfo)));
        }
Example #33
0
        private bool IsOptimized(SegmentInfos infos, int maxNumSegments, ISet <SegmentInfo> segmentsToOptimize, IState state)
        {
            int         numSegments   = infos.Count;
            int         numToOptimize = 0;
            SegmentInfo optimizeInfo  = null;

            for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++)
            {
                SegmentInfo info = infos.Info(i);
                if (segmentsToOptimize.Contains(info))
                {
                    numToOptimize++;
                    optimizeInfo = info;
                }
            }

            return(numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo, state)));
        }
Example #34
0
        private bool IsOptimized(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize)
        {
            int         numSegments   = infos.Count;
            int         numToOptimize = 0;
            SegmentInfo optimizeInfo  = null;

            for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++)
            {
                SegmentInfo info = infos.Info(i);
                if (segmentsToOptimize.Contains(info))
                {
                    numToOptimize++;
                    optimizeInfo = info;
                }
            }

            return(numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo)));
        }
Example #35
0
        internal void  IncRef(SegmentInfos segmentInfos, bool isCommit)
        {
            int size = segmentInfos.Count;

            for (int i = 0; i < size; i++)
            {
                SegmentInfo segmentInfo = segmentInfos.Info(i);
                if (segmentInfo.dir == directory)
                {
                    IncRef(segmentInfo.Files());
                }
            }

            if (isCommit)
            {
                // Since this is a commit point, also incref its
                // segments_N file:
                GetRefCount(segmentInfos.GetCurrentSegmentFileName()).IncRef();
            }
        }
			protected internal override System.Object DoBody(System.String segmentFileName)
			{
				
				SegmentInfos infos = new SegmentInfos();
				infos.Read(directory, segmentFileName);
				
				DirectoryIndexReader reader;
				
				if (infos.Count == 1)
				{
					// index is optimized
					reader = SegmentReader.Get(infos, infos.Info(0), closeDirectory);
				}
				else
				{
					reader = new MultiSegmentReader(directory, infos, closeDirectory);
				}
				reader.SetDeletionPolicy(deletionPolicy);
				return reader;
			}
        internal virtual void Init(Directory directory, SegmentInfos segmentInfos, bool closeDirectory, bool readOnly)
        {
            this.directory      = directory;
            this.segmentInfos   = segmentInfos;
            this.closeDirectory = closeDirectory;
            this.readOnly       = readOnly;

            if (!readOnly && segmentInfos != null)
            {
                // we assume that this segments_N was properly sync'd prior
                for (int i = 0; i < segmentInfos.Count; i++)
                {
                    SegmentInfo    info  = segmentInfos.Info(i);
                    IList <string> files = info.Files();
                    for (int j = 0; j < files.Count; j++)
                    {
                        synced[files[j]] = files[j];
                    }
                }
            }
        }
Example #38
0
        /// <summary>
        /// Returns <c>true</c> if the number of segments eligible for
        /// merging is less than or equal to the specified
        /// <paramref name="maxNumSegments"/>.
        /// </summary>
        protected virtual bool IsMerged(SegmentInfos infos, int maxNumSegments, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge)
        {
            int numSegments             = infos.Count;
            int numToMerge              = 0;
            SegmentCommitInfo mergeInfo = null;
            bool segmentIsOriginal      = false;

            for (int i = 0; i < numSegments && numToMerge <= maxNumSegments; i++)
            {
                SegmentCommitInfo info = infos.Info(i);
                bool?isOriginal;
                segmentsToMerge.TryGetValue(info, out isOriginal);
                if (isOriginal != null)
                {
                    segmentIsOriginal = isOriginal.Value;
                    numToMerge++;
                    mergeInfo = info;
                }
            }

            return(numToMerge <= maxNumSegments && (numToMerge != 1 || !segmentIsOriginal || IsMerged(infos, mergeInfo)));
        }
Example #39
0
            public CommitPoint(IndexFileDeleter enclosingInstance, System.Collections.ICollection commitsToDelete, Directory directory, SegmentInfos segmentInfos)
            {
                InitBlock(enclosingInstance);
                this.directory = directory;
                this.commitsToDelete = commitsToDelete;
                userData = segmentInfos.GetUserData();
                segmentsFileName = segmentInfos.GetCurrentSegmentFileName();
                version = segmentInfos.GetVersion();
                generation = segmentInfos.GetGeneration();
                files = segmentInfos.Files(directory, true);
                gen = segmentInfos.GetGeneration();
                isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions();

                System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory));
            }
Example #40
0
 /// <summary>This constructor is only used for <see cref="Reopen()" /> </summary>
 internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts,
                          IEnumerable<KeyValuePair<string, byte[]>> oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor)
 {
     this.internalDirectory = directory;
     this.readOnly = readOnly;
     this.segmentInfos = infos;
     this.termInfosIndexDivisor = termInfosIndexDivisor;
     if (!readOnly)
     {
         // We assume that this segments_N was previously
         // properly sync'd:
         synced.UnionWith(infos.Files(directory, true));
     }
     
     // we put the old SegmentReaders in a map, that allows us
     // to lookup a reader using its segment name
     IDictionary<string, int> segmentReaders = new HashMap<string, int>();
     
     if (oldReaders != null)
     {
         // create a Map SegmentName->SegmentReader
         for (int i = 0; i < oldReaders.Length; i++)
         {
             segmentReaders[oldReaders[i].SegmentName] = i;
         }
     }
     
     var newReaders = new SegmentReader[infos.Count];
     
     // remember which readers are shared between the old and the re-opened
     // DirectoryReader - we have to incRef those readers
     var readerShared = new bool[infos.Count];
     
     for (int i = infos.Count - 1; i >= 0; i--)
     {
         // find SegmentReader for this segment
         if (!segmentReaders.ContainsKey(infos.Info(i).name))
         {
             // this is a new segment, no old SegmentReader can be reused
             newReaders[i] = null;
         }
         else
         {
             // there is an old reader for this segment - we'll try to reopen it
             newReaders[i] = oldReaders[segmentReaders[infos.Info(i).name]];
         }
         
         bool success = false;
         try
         {
             SegmentReader newReader;
             if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].SegmentInfo.GetUseCompoundFile())
             {
                 
                 // We should never see a totally new segment during cloning
                 System.Diagnostics.Debug.Assert(!doClone);
                 
                 // this is a new reader; in case we hit an exception we can close it safely
                 newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor);
             }
             else
             {
                 newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly);
             }
             if (newReader == newReaders[i])
             {
                 // this reader will be shared between the old and the new one,
                 // so we must incRef it
                 readerShared[i] = true;
                 newReader.IncRef();
             }
             else
             {
                 readerShared[i] = false;
                 newReaders[i] = newReader;
             }
             success = true;
         }
         finally
         {
             if (!success)
             {
                 for (i++; i < infos.Count; i++)
                 {
                     if (newReaders[i] != null)
                     {
                         try
                         {
                             if (!readerShared[i])
                             {
                                 // this is a new subReader that is not used by the old one,
                                 // we can close it
                                 newReaders[i].Close();
                             }
                             else
                             {
                                 // this subReader is also used by the old reader, so instead
                                 // closing we must decRef it
                                 newReaders[i].DecRef();
                             }
                         }
                         catch (System.IO.IOException)
                         {
                             // keep going - we want to clean up as much as possible
                         }
                     }
                 }
             }
         }
     }
     
     // initialize the readers to calculate maxDoc before we try to reuse the old normsCache
     Initialize(newReaders);
     
     // try to copy unchanged norms from the old normsCache to the new one
     if (oldNormsCache != null)
     {
         foreach(var entry in oldNormsCache)
         {
             String field = entry.Key;
             if (!HasNorms(field))
             {
                 continue;
             }
             
             byte[] oldBytes = entry.Value;
             
             var bytes = new byte[MaxDoc];
             
             for (int i = 0; i < subReaders.Length; i++)
             {
                 int oldReaderIndex = segmentReaders[subReaders[i].SegmentName];
                 
                 // this SegmentReader was not re-opened, we can copy all of its norms 
                 if (segmentReaders.ContainsKey(subReaders[i].SegmentName) &&
                      (oldReaders[oldReaderIndex] == subReaders[i]
                        || oldReaders[oldReaderIndex].norms[field] == subReaders[i].norms[field]))
                 {
                     // we don't have to synchronize here: either this constructor is called from a SegmentReader,
                     // in which case no old norms cache is present, or it is called from MultiReader.reopen(),
                     // which is synchronized
                     Array.Copy(oldBytes, oldStarts[oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]);
                 }
                 else
                 {
                     subReaders[i].Norms(field, bytes, starts[i]);
                 }
             }
             
             normsCache[field] = bytes; // update cache
         }
     }
 }
		internal bool ApplyDeletes(SegmentInfos infos)
		{
			lock (this)
			{
				
				if (!HasDeletes())
					return false;
				
				if (infoStream != null)
					Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + deletesFlushed.docIDs.Count + " deleted docIDs and " + deletesFlushed.queries.Count + " deleted queries on " + (+ infos.Count) + " segments.");
				
				int infosEnd = infos.Count;
				
				int docStart = 0;
				bool any = false;
				for (int i = 0; i < infosEnd; i++)
				{
					
					// Make sure we never attempt to apply deletes to
					// segment in external dir
					System.Diagnostics.Debug.Assert(infos.Info(i).dir == directory);
					
					SegmentReader reader = writer.readerPool.Get(infos.Info(i), false);
					try
					{
						any |= ApplyDeletes(reader, docStart);
						docStart += reader.MaxDoc();
					}
					finally
					{
						writer.readerPool.Release(reader);
					}
				}
				
				deletesFlushed.Clear();
				
				return any;
			}
		}
Example #42
0
        /// <summary>
        /// Returns a <seealso cref="Status"/> instance detailing
        ///  the state of the index.
        /// </summary>
        ///  <param name="onlySegments"> list of specific segment names to check
        ///
        ///  <p>As this method checks every byte in the specified
        ///  segments, on a large index it can take quite a long
        ///  time to run.
        ///
        ///  <p><b>WARNING</b>: make sure
        ///  you only call this when the index is not opened by any
        ///  writer.  </param>
        public virtual Status DoCheckIndex(IList<string> onlySegments)
        {
            NumberFormatInfo nf = CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis = new SegmentInfos();
            Status result = new Status();
            result.Dir = Dir;
            try
            {
                sis.Read(Dir);
            }
            catch (Exception t)
            {
                Msg(infoStream, "ERROR: could not read any segments file in directory");
                result.MissingSegments = true;
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(t.ToString());
                    //infoStream.WriteLine(t.StackTrace);
                }
                return result;
            }

            // find the oldest and newest segment versions
            string oldest = Convert.ToString(int.MaxValue), newest = Convert.ToString(int.MinValue);
            string oldSegs = null;
            bool foundNonNullVersion = false;
            IComparer<string> versionComparator = StringHelper.VersionComparator;
            foreach (SegmentCommitInfo si in sis.Segments)
            {
                string version = si.Info.Version;
                if (version == null)
                {
                    // pre-3.1 segment
                    oldSegs = "pre-3.1";
                }
                else
                {
                    foundNonNullVersion = true;
                    if (versionComparator.Compare(version, oldest) < 0)
                    {
                        oldest = version;
                    }
                    if (versionComparator.Compare(version, newest) > 0)
                    {
                        newest = version;
                    }
                }
            }

            int numSegments = sis.Size();
            string segmentsFileName = sis.SegmentsFileName;
            // note: we only read the format byte (required preamble) here!
            IndexInput input = null;
            try
            {
                input = Dir.OpenInput(segmentsFileName, IOContext.READONCE);
            }
            catch (Exception t)
            {
                Msg(infoStream, "ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(t.ToString());
                    //infoStream.WriteLine(t.StackTrace);
                }
                result.CantOpenSegments = true;
                return result;
            }
            int format = 0;
            try
            {
                format = input.ReadInt();
            }
            catch (Exception t)
            {
                Msg(infoStream, "ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    // LUCENENET NOTE: Some tests rely on the error type being in
                    // the message. We can't get the error type with StackTrace, we
                    // need ToString() for that.
                    infoStream.WriteLine(t.ToString());
                    //infoStream.WriteLine(t.StackTrace);
                }
                result.MissingSegmentVersion = true;
                return result;
            }
            finally
            {
                if (input != null)
                {
                    input.Dispose();
                }
            }

            string sFormat = "";
            bool skip = false;

            result.SegmentsFileName = segmentsFileName;
            result.NumSegments = numSegments;
            result.UserData = sis.UserData;
            string userDataString;
            if (sis.UserData.Count > 0)
            {
                userDataString = " userData=" + sis.UserData;
            }
            else
            {
                userDataString = "";
            }

            string versionString = null;
            if (oldSegs != null)
            {
                if (foundNonNullVersion)
                {
                    versionString = "versions=[" + oldSegs + " .. " + newest + "]";
                }
                else
                {
                    versionString = "version=" + oldSegs;
                }
            }
            else
            {
                versionString = oldest.Equals(newest) ? ("version=" + oldest) : ("versions=[" + oldest + " .. " + newest + "]");
            }

            Msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments + " " + versionString + " format=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.Partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                    foreach (string s in onlySegments)
                    {
                        infoStream.Write(" " + s);
                    }
                }
                result.SegmentsChecked.AddRange(onlySegments);
                Msg(infoStream, ":");
            }

            if (skip)
            {
                Msg(infoStream, "\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.ToolOutOfDate = true;
                return result;
            }

            result.NewSegments = (SegmentInfos)sis.Clone();
            result.NewSegments.Clear();
            result.MaxSegmentName = -1;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentCommitInfo info = sis.Info(i);
                int segmentName = 0;
                try
                {
                    segmentName = int.Parse /*Convert.ToInt32*/(info.Info.Name.Substring(1));
                }
                catch
                {
                }
                if (segmentName > result.MaxSegmentName)
                {
                    result.MaxSegmentName = segmentName;
                }
                if (onlySegments != null && !onlySegments.Contains(info.Info.Name))
                {
                    continue;
                }
                Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
                result.SegmentInfos.Add(segInfoStat);
                Msg(infoStream, "  " + (1 + i) + " of " + numSegments + ": name=" + info.Info.Name + " docCount=" + info.Info.DocCount);
                segInfoStat.Name = info.Info.Name;
                segInfoStat.DocCount = info.Info.DocCount;

                string version = info.Info.Version;
                if (info.Info.DocCount <= 0 && version != null && versionComparator.Compare(version, "4.5") >= 0)
                {
                    throw new Exception("illegal number of documents: maxDoc=" + info.Info.DocCount);
                }

                int toLoseDocCount = info.Info.DocCount;

                AtomicReader reader = null;

                try
                {
                    Codec codec = info.Info.Codec;
                    Msg(infoStream, "    codec=" + codec);
                    segInfoStat.Codec = codec;
                    Msg(infoStream, "    compound=" + info.Info.UseCompoundFile);
                    segInfoStat.Compound = info.Info.UseCompoundFile;
                    Msg(infoStream, "    numFiles=" + info.Files().Count);
                    segInfoStat.NumFiles = info.Files().Count;
                    segInfoStat.SizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    if (info.Info.GetAttribute(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY) == null)
                    {
                        // don't print size in bytes if its a 3.0 segment with shared docstores
                        Msg(infoStream, "    size (MB)=" + segInfoStat.SizeMB.ToString(nf));
                    }
                    IDictionary<string, string> diagnostics = info.Info.Diagnostics;
                    segInfoStat.Diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg(infoStream, "    diagnostics = " + diagnostics);
                    }

                    if (!info.HasDeletions())
                    {
                        Msg(infoStream, "    no deletions");
                        segInfoStat.HasDeletions = false;
                    }
                    else
                    {
                        Msg(infoStream, "    has deletions [delGen=" + info.DelGen + "]");
                        segInfoStat.HasDeletions = true;
                        segInfoStat.DeletionsGen = info.DelGen;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT);
                    Msg(infoStream, "OK");

                    segInfoStat.OpenReaderPassed = true;

                    if (infoStream != null)
                    {
                        infoStream.Write("    test: check integrity.....");
                    }
                    reader.CheckIntegrity();
                    Msg(infoStream, "OK");

                    if (infoStream != null)
                    {
                        infoStream.Write("    test: check live docs.....");
                    }
                    int numDocs = reader.NumDocs;
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions)
                    {
                        if (reader.NumDocs != info.Info.DocCount - info.DelCount)
                        {
                            throw new Exception("delete count mismatch: info=" + (info.Info.DocCount - info.DelCount) + " vs reader=" + reader.NumDocs);
                        }
                        if ((info.Info.DocCount - reader.NumDocs) > reader.MaxDoc)
                        {
                            throw new Exception("too many deleted docs: maxDoc()=" + reader.MaxDoc + " vs del count=" + (info.Info.DocCount - reader.NumDocs));
                        }
                        if (info.Info.DocCount - numDocs != info.DelCount)
                        {
                            throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs));
                        }
                        Bits liveDocs = reader.LiveDocs;
                        if (liveDocs == null)
                        {
                            throw new Exception("segment should have deletions, but liveDocs is null");
                        }
                        else
                        {
                            int numLive = 0;
                            for (int j = 0; j < liveDocs.Length(); j++)
                            {
                                if (liveDocs.Get(j))
                                {
                                    numLive++;
                                }
                            }
                            if (numLive != numDocs)
                            {
                                throw new Exception("liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive);
                            }
                        }

                        segInfoStat.NumDeleted = info.Info.DocCount - numDocs;
                        Msg(infoStream, "OK [" + (segInfoStat.NumDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.DelCount != 0)
                        {
                            throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs));
                        }
                        Bits liveDocs = reader.LiveDocs;
                        if (liveDocs != null)
                        {
                            // its ok for it to be non-null here, as long as none are set right?
                            for (int j = 0; j < liveDocs.Length(); j++)
                            {
                                if (!liveDocs.Get(j))
                                {
                                    throw new Exception("liveDocs mismatch: info says no deletions but doc " + j + " is deleted.");
                                }
                            }
                        }
                        Msg(infoStream, "OK");
                    }
                    if (reader.MaxDoc != info.Info.DocCount)
                    {
                        throw new Exception("SegmentReader.maxDoc() " + reader.MaxDoc + " != SegmentInfos.docCount " + info.Info.DocCount);
                    }

                    // Test getFieldInfos()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    FieldInfos fieldInfos = reader.FieldInfos;
                    Msg(infoStream, "OK [" + fieldInfos.Size() + " fields]");
                    segInfoStat.NumFields = fieldInfos.Size();

                    // Test Field Norms
                    segInfoStat.FieldNormStatus = TestFieldNorms(reader, infoStream);

                    // Test the Term Index
                    segInfoStat.TermIndexStatus = TestPostings(reader, infoStream, Verbose);

                    // Test Stored Fields
                    segInfoStat.StoredFieldStatus = TestStoredFields(reader, infoStream);

                    // Test Term Vectors
                    segInfoStat.TermVectorStatus = TestTermVectors(reader, infoStream, Verbose, CrossCheckTermVectors_Renamed);

                    segInfoStat.DocValuesStatus = TestDocValues(reader, infoStream);

                    // Rethrow the first exception we encountered
                    //  this will cause stats for failed segments to be incremented properly
                    if (segInfoStat.FieldNormStatus.Error != null)
                    {
                        throw new Exception("Field Norm test failed");
                    }
                    else if (segInfoStat.TermIndexStatus.Error != null)
                    {
                        throw new Exception("Term Index test failed");
                    }
                    else if (segInfoStat.StoredFieldStatus.Error != null)
                    {
                        throw new Exception("Stored Field test failed");
                    }
                    else if (segInfoStat.TermVectorStatus.Error != null)
                    {
                        throw new Exception("Term Vector test failed");
                    }
                    else if (segInfoStat.DocValuesStatus.Error != null)
                    {
                        throw new Exception("DocValues test failed");
                    }

                    Msg(infoStream, "");
                }
                catch (Exception t)
                {
                    Msg(infoStream, "FAILED");
                    string comment;
                    comment = "fixIndex() would remove reference to this segment";
                    Msg(infoStream, "    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        // LUCENENET NOTE: Some tests rely on the error type being in
                        // the message. We can't get the error type with StackTrace, we
                        // need ToString() for that.
                        infoStream.WriteLine(t.ToString());
                        //infoStream.WriteLine(t.StackTrace);
                    }
                    Msg(infoStream, "");
                    result.TotLoseDocCount += toLoseDocCount;
                    result.NumBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Dispose();
                    }
                }

                // Keeper
                result.NewSegments.Add((SegmentCommitInfo)info.Clone());
            }

            if (0 == result.NumBadSegments)
            {
                result.Clean = true;
            }
            else
            {
                Msg(infoStream, "WARNING: " + result.NumBadSegments + " broken segments (containing " + result.TotLoseDocCount + " documents) detected");
            }

            if (!(result.ValidCounter = (result.MaxSegmentName < sis.Counter)))
            {
                result.Clean = false;
                result.NewSegments.Counter = result.MaxSegmentName + 1;
                Msg(infoStream, "ERROR: Next segment name counter " + sis.Counter + " is not greater than max segment name " + result.MaxSegmentName);
            }

            if (result.Clean)
            {
                Msg(infoStream, "No problems were detected with this index.\n");
            }

            return result;
        }
Example #43
0
		/// <summary> Merges all segments from an array of indexes into this
		/// index.
		/// 
		/// <p/>This may be used to parallelize batch indexing.  A large document
		/// collection can be broken into sub-collections.  Each sub-collection can be
		/// indexed in parallel, on a different thread, process or machine.  The
		/// complete index can then be created by merging sub-collection indexes
		/// with this method.
		/// 
		/// <p/><b>NOTE:</b> the index in each Directory must not be
		/// changed (opened by a writer) while this method is
		/// running.  This method does not acquire a write lock in
		/// each input Directory, so it is up to the caller to
		/// enforce this.
		/// 
		/// <p/><b>NOTE:</b> while this is running, any attempts to
		/// add or delete documents (with another thread) will be
		/// paused until this method completes.
		/// 
		/// <p/>This method is transactional in how Exceptions are
		/// handled: it does not commit a new segments_N file until
		/// all indexes are added.  This means if an Exception
		/// occurs (for example disk full), then either no indexes
		/// will have been added or they all will have been.<p/>
		/// 
		/// <p/>Note that this requires temporary free space in the
		/// Directory up to 2X the sum of all input indexes
		/// (including the starting index).  If readers/searchers
		/// are open against the starting index, then temporary
		/// free space required will be higher by the size of the
		/// starting index (see {@link #Optimize()} for details).
		/// <p/>
		/// 
		/// <p/>Once this completes, the final size of the index
		/// will be less than the sum of all input index sizes
		/// (including the starting index).  It could be quite a
		/// bit smaller (if there were many pending deletes) or
		/// just slightly smaller.<p/>
		/// 
		/// <p/>
		/// This requires this index not be among those to be added.
		/// 
		/// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
		/// you should immediately close the writer.  See <a
		/// href="#OOME">above</a> for details.<p/>
		/// 
		/// </summary>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public virtual void  AddIndexesNoOptimize(Directory[] dirs)
		{
			
			EnsureOpen();
			
			NoDupDirs(dirs);
			
			// Do not allow add docs or deletes while we are running:
			docWriter.PauseAllThreads();
			
			try
			{
				if (infoStream != null)
					Message("flush at addIndexesNoOptimize");
				Flush(true, false, true);
				
				bool success = false;
				
				StartTransaction(false);
				
				try
				{
					
					int docCount = 0;
					lock (this)
					{
						EnsureOpen();
						
						for (int i = 0; i < dirs.Length; i++)
						{
							if (directory == dirs[i])
							{
								// cannot add this index: segments may be deleted in merge before added
								throw new System.ArgumentException("Cannot add this index to itself");
							}
							
							SegmentInfos sis = new SegmentInfos(); // read infos from dir
							sis.Read(dirs[i]);
							for (int j = 0; j < sis.Count; j++)
							{
								SegmentInfo info = sis.Info(j);
								System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info), "dup info dir=" + info.dir + " name=" + info.name);
								docCount += info.docCount;
								segmentInfos.Add(info); // add each info
							}
						}
					}
					
					// Notify DocumentsWriter that the flushed count just increased
					docWriter.UpdateFlushedDocCount(docCount);
					
					MaybeMerge();
					
					EnsureOpen();
					
					// If after merging there remain segments in the index
					// that are in a different directory, just copy these
					// over into our index.  This is necessary (before
					// finishing the transaction) to avoid leaving the
					// index in an unusable (inconsistent) state.
					ResolveExternalSegments();
					
					EnsureOpen();
					
					success = true;
				}
				finally
				{
					if (success)
					{
						CommitTransaction();
					}
					else
					{
						RollbackTransaction();
					}
				}
			}
			catch (System.OutOfMemoryException oom)
			{
				HandleOOM(oom, "addIndexesNoOptimize");
			}
			finally
			{
				if (docWriter != null)
				{
					docWriter.ResumeAllThreads();
				}
			}
		}
        /// <summary>This contructor is only used for {@link #Reopen()} </summary>
        internal MultiSegmentReader(Directory directory, SegmentInfos infos, bool closeDirectory, SegmentReader[] oldReaders, int[] oldStarts, Dictionary<string, byte[]> oldNormsCache, bool readOnly)
            : base(directory, infos, closeDirectory, readOnly)
        {
            // we put the old SegmentReaders in a map, that allows us
            // to lookup a reader using its segment name
            Dictionary<string, int> segmentReaders = new Dictionary<string, int>();

            if (oldReaders != null)
            {
                // create a Map SegmentName->SegmentReader
                for (int i = 0; i < oldReaders.Length; i++)
                {
                    segmentReaders[oldReaders[i].GetSegmentName()] = i;
                }
            }

            SegmentReader[] newReaders = new SegmentReader[infos.Count];

            // remember which readers are shared between the old and the re-opened
            // MultiSegmentReader - we have to incRef those readers
            bool[] readerShared = new bool[infos.Count];

            for (int i = infos.Count - 1; i >= 0; i--)
            {
                // find SegmentReader for this segment
                int oldReaderIndex;
                if (!segmentReaders.ContainsKey(infos.Info(i).name))
                {
                    // this is a new segment, no old SegmentReader can be reused
                    newReaders[i] = null;
                }
                else
                {
                    oldReaderIndex = segmentReaders[infos.Info(i).name];
                    // there is an old reader for this segment - we'll try to reopen it
                    newReaders[i] = oldReaders[oldReaderIndex];
                }

                bool success = false;
                try
                {
                    SegmentReader newReader;
                    if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].GetSegmentInfo().GetUseCompoundFile())
                    {
                        // this is a new reader; in case we hit an exception we can close it safely
                        newReader = SegmentReader.Get(readOnly, infos.Info(i));
                    }
                    else
                    {
                        newReader = (SegmentReader) newReaders[i].ReopenSegment(infos.Info(i));
                    }
                    if (newReader == newReaders[i])
                    {
                        // this reader will be shared between the old and the new one,
                        // so we must incRef it
                        readerShared[i] = true;
                        newReader.IncRef();
                    }
                    else
                    {
                        readerShared[i] = false;
                        newReaders[i] = newReader;
                    }
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        for (i++; i < infos.Count; i++)
                        {
                            if (newReaders[i] != null)
                            {
                                try
                                {
                                    if (!readerShared[i])
                                    {
                                        // this is a new subReader that is not used by the old one,
                                        // we can close it
                                        newReaders[i].Close();
                                    }
                                    else
                                    {
                                        // this subReader is also used by the old reader, so instead
                                        // closing we must decRef it
                                        newReaders[i].DecRef();
                                    }
                                }
                                catch (System.IO.IOException)
                                {
                                    // keep going - we want to clean up as much as possible
                                }
                            }
                        }
                    }
                }
            }

            // initialize the readers to calculate maxDoc before we try to reuse the old normsCache
            Initialize(newReaders);

            // try to copy unchanged norms from the old normsCache to the new one
            if (oldNormsCache != null)
            {
                IEnumerator<KeyValuePair<string, byte[]>> it = oldNormsCache.GetEnumerator();
                while (it.MoveNext())
                {
                    KeyValuePair<string, byte[]> entry = it.Current;
                    string field = entry.Key;
                    if (!HasNorms(field))
                    {
                        continue;
                    }

                    byte[] oldBytes = entry.Value;
                    byte[] bytes = new byte[MaxDoc()];

                    for (int i = 0; i < subReaders.Length; i++)
                    {
                        if (segmentReaders.ContainsKey(subReaders[i].GetSegmentName()))
                        {
                            int oldReaderIndex = segmentReaders[subReaders[i].GetSegmentName()];
                            // this SegmentReader was not re-opened, we can copy all of its norms
                            if (oldReaders[oldReaderIndex] == subReaders[i] || oldReaders[oldReaderIndex].norms[field] == subReaders[i].norms[field])
                            {
                                // we don't have to synchronize here: either this constructor is called from a SegmentReader,
                                // in which case no old norms cache is present, or it is called from MultiReader.reopen(),
                                // which is synchronized
                                System.Array.Copy(oldBytes, oldStarts[oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]);
                            }
                            else
                            {
                                subReaders[i].Norms(field, bytes, starts[i]);
                            }
                        }
                        else
                        {
                            subReaders[i].Norms(field, bytes, starts[i]);
                        }
                    }

                    normsCache[field] = bytes; // update cache
                }
            }
        }
Example #45
0
 internal ReaderCommit(SegmentInfos infos, Directory dir)
 {
     segmentsFileName = infos.GetCurrentSegmentFileName();
     this.dir = dir;
     userData = infos.UserData;
     files = infos.Files(dir, true);
     version = infos.Version;
     generation = infos.Generation;
     isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions();
 }
Example #46
0
        protected OneMerge MakeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge)
        {
            bool doCFS;
            if (!useCompoundFile)
            {
                doCFS = false;
            }
            else if (noCFSRatio == 1.0)
            {
                doCFS = true;
            }
            else
            {
                long totSize = 0;
                for (int i = 0; i < infos.Count; i++)
                {
                    totSize += Size(infos.Info(i));
                }
                long mergeSize = 0;
                for (int i = 0; i < infosToMerge.Count; i++)
                {
                    mergeSize += Size(infosToMerge.Info(i));
                }

                doCFS = mergeSize <= noCFSRatio * totSize;
            }

            return new OneMerge(infosToMerge, doCFS);
        }
 internal ReaderCommit(SegmentInfos infos, Directory dir)
 {
     segmentsFileName = infos.GetCurrentSegmentFileName();
     this.dir = dir;
     int size = infos.Count;
     files = new List<string>(size);
     files.Add(segmentsFileName);
     for (int i = 0; i < size; i++)
     {
         SegmentInfo info = infos.Info(i);
         if (info.dir == dir)
             SupportClass.CollectionsSupport.AddAll(info.Files(), files);
     }
     version = infos.GetVersion();
     generation = infos.GetGeneration();
     isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions();
 }
        internal virtual void Init(Directory directory, SegmentInfos segmentInfos, bool closeDirectory, bool readOnly)
        {
            this.directory = directory;
            this.segmentInfos = segmentInfos;
            this.closeDirectory = closeDirectory;
            this.readOnly = readOnly;

            if (!readOnly && segmentInfos != null)
            {
                // we assume that this segments_N was properly sync'd prior
                for (int i = 0; i < segmentInfos.Count; i++)
                {
                    SegmentInfo info = segmentInfos.Info(i);
                    IList<string> files = info.Files();
                    for (int j = 0; j < files.Count; j++)
                        synced[files[j]] = files[j];
                }
            }
        }
Example #49
0
		private void  SetRollbackSegmentInfos(SegmentInfos infos)
		{
			lock (this)
			{
				rollbackSegmentInfos = (SegmentInfos) infos.Clone();
				System.Diagnostics.Debug.Assert(!rollbackSegmentInfos.HasExternalSegments(directory));
				rollbackSegments = new System.Collections.Hashtable();
				int size = rollbackSegmentInfos.Count;
				for (int i = 0; i < size; i++)
					rollbackSegments[rollbackSegmentInfos.Info(i)] = (System.Int32) i;
			}
		}
Example #50
0
		private System.String SegString(SegmentInfos infos)
		{
			lock (this)
			{
				System.Text.StringBuilder buffer = new System.Text.StringBuilder();
				int count = infos.Count;
				for (int i = 0; i < count; i++)
				{
					if (i > 0)
					{
						buffer.Append(' ');
					}
					SegmentInfo info = infos.Info(i);
					buffer.Append(info.SegString(directory));
					if (info.dir != directory)
						buffer.Append("**");
				}
				return buffer.ToString();
			}
		}
Example #51
0
			/// <summary>Forcefully clear changes for the specifed segments,
			/// and remove from the pool.   This is called on succesful merge. 
			/// </summary>
			internal virtual void  Clear(SegmentInfos infos)
			{
				lock (this)
				{
					if (infos == null)
					{
                        System.Collections.IEnumerator iter = new System.Collections.Hashtable(readerMap).GetEnumerator();
						while (iter.MoveNext())
						{
							System.Collections.DictionaryEntry ent = (System.Collections.DictionaryEntry) iter.Current;
							((SegmentReader) ent.Value).hasChanges = false;
						}
					}
					else
					{
						int numSegments = infos.Count;
						for (int i = 0; i < numSegments; i++)
						{
							SegmentInfo info = infos.Info(i);
							if (readerMap.Contains(info))
							{
								((SegmentReader) readerMap[info]).hasChanges = false;
							}
						}
					}
				}
			}
Example #52
0
 // Used by near real-time search
 internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor)
 {
     this.internalDirectory = writer.Directory;
     this.readOnly = true;
     segmentInfos = infos;
     segmentInfosStart = (SegmentInfos) infos.Clone();
     this.termInfosIndexDivisor = termInfosIndexDivisor;
     if (!readOnly)
     {
         // We assume that this segments_N was previously
         // properly sync'd:
         synced.UnionWith(infos.Files(internalDirectory, true));
     }
     
     // IndexWriter synchronizes externally before calling
     // us, which ensures infos will not change; so there's
     // no need to process segments in reverse order
     int numSegments = infos.Count;
     var readers = new SegmentReader[numSegments];
     Directory dir = writer.Directory;
     int upto = 0;
     
     for (int i = 0; i < numSegments; i++)
     {
         bool success = false;
         try
         {
             SegmentInfo info = infos.Info(i);
             if (info.dir == dir)
             {
                 readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor);
             }
             success = true;
         }
         finally
         {
             if (!success)
             {
                 // Close all readers we had opened:
                 for (upto--; upto >= 0; upto--)
                 {
                     try
                     {
                         readers[upto].Close();
                     }
                     catch (System.Exception)
                     {
                         // keep going - we want to clean up as much as possible
                     }
                 }
             }
         }
     }
     
     this.writer = writer;
     
     if (upto < readers.Length)
     {
         // This means some segments were in a foreign Directory
         var newReaders = new SegmentReader[upto];
         Array.Copy(readers, 0, newReaders, 0, upto);
         readers = newReaders;
     }
     
     Initialize(readers);
 }
Example #53
0
 /// <summary>Construct reading the named set of readers. </summary>
 internal DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)
 {
     internalDirectory = directory;
     this.readOnly = readOnly;
     this.segmentInfos = sis;
     this.deletionPolicy = deletionPolicy;
     this.termInfosIndexDivisor = termInfosIndexDivisor;
     
     if (!readOnly)
     {
         // We assume that this segments_N was previously
         // properly sync'd:
         synced.UnionWith(sis.Files(directory, true));
     }
     
     // To reduce the chance of hitting FileNotFound
     // (and having to retry), we open segments in
     // reverse because IndexWriter merges & deletes
     // the newest segments first.
     
     var readers = new SegmentReader[sis.Count];
     for (int i = sis.Count - 1; i >= 0; i--)
     {
         bool success = false;
         try
         {
             readers[i] = SegmentReader.Get(readOnly, sis.Info(i), termInfosIndexDivisor);
             success = true;
         }
         finally
         {
             if (!success)
             {
                 // Close all readers we had opened:
                 for (i++; i < sis.Count; i++)
                 {
                     try
                     {
                         readers[i].Close();
                     }
                     catch (System.Exception)
                     {
                         // keep going - we want to clean up as much as possible
                     }
                 }
             }
         }
     }
     
     Initialize(readers);
 }
Example #54
0
 /// <summary>Returns a <see cref="Status" /> instance detailing
 /// the state of the index.
 /// 
 /// </summary>
 /// <param name="onlySegments">list of specific segment names to check
 /// 
 /// <p/>As this method checks every byte in the specified
 /// segments, on a large index it can take quite a long
 /// time to run.
 /// 
 /// <p/><b>WARNING</b>: make sure
 /// you only call this when the index is not opened by any
 /// writer. 
 /// </param>
 public virtual Status CheckIndex_Renamed_Method(List<string> onlySegments)
 {
     System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
     SegmentInfos sis = new SegmentInfos();
     Status result = new Status();
     result.dir = dir;
     try
     {
         sis.Read(dir);
     }
     catch (System.Exception t)
     {
         Msg("ERROR: could not read any segments file in directory");
         result.missingSegments = true;
         if (infoStream != null)
             infoStream.WriteLine(t.StackTrace);
         return result;
     }
     
     int numSegments = sis.Count;
     var segmentsFileName = sis.GetCurrentSegmentFileName();
     IndexInput input = null;
     try
     {
         input = dir.OpenInput(segmentsFileName);
     }
     catch (System.Exception t)
     {
         Msg("ERROR: could not open segments file in directory");
         if (infoStream != null)
             infoStream.WriteLine(t.StackTrace);
         result.cantOpenSegments = true;
         return result;
     }
     int format = 0;
     try
     {
         format = input.ReadInt();
     }
     catch (System.Exception t)
     {
         Msg("ERROR: could not read segment file version in directory");
         if (infoStream != null)
             infoStream.WriteLine(t.StackTrace);
         result.missingSegmentVersion = true;
         return result;
     }
     finally
     {
         if (input != null)
             input.Close();
     }
     
     System.String sFormat = "";
     bool skip = false;
     
     if (format == SegmentInfos.FORMAT)
         sFormat = "FORMAT [Lucene Pre-2.1]";
     if (format == SegmentInfos.FORMAT_LOCKLESS)
         sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
     else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
         sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
     else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
         sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
     else
     {
         if (format == SegmentInfos.FORMAT_CHECKSUM)
             sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
         else if (format == SegmentInfos.FORMAT_DEL_COUNT)
             sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
         else if (format == SegmentInfos.FORMAT_HAS_PROX)
             sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
         else if (format == SegmentInfos.FORMAT_USER_DATA)
             sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
         else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
             sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
         else if (format < SegmentInfos.CURRENT_FORMAT)
         {
             sFormat = "int=" + format + " [newer version of Lucene than this tool]";
             skip = true;
         }
         else
         {
             sFormat = format + " [Lucene 1.3 or prior]";
         }
     }
     
     result.segmentsFileName = segmentsFileName;
     result.numSegments = numSegments;
     result.segmentFormat = sFormat;
     result.userData = sis.UserData;
     System.String userDataString;
     if (sis.UserData.Count > 0)
     {
         userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData);
     }
     else
     {
         userDataString = "";
     }
     
     Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
     
     if (onlySegments != null)
     {
         result.partial = true;
         if (infoStream != null)
             infoStream.Write("\nChecking only these segments:");
         foreach(string s in onlySegments)
         {
             if (infoStream != null)
             {
                 infoStream.Write(" " + s);
             }
         }
         result.segmentsChecked.AddRange(onlySegments);
         Msg(":");
     }
     
     if (skip)
     {
         Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
         result.toolOutOfDate = true;
         return result;
     }
     
     
     result.newSegments = (SegmentInfos) sis.Clone();
     result.newSegments.Clear();
     
     for (int i = 0; i < numSegments; i++)
     {
         SegmentInfo info = sis.Info(i);
         if (onlySegments != null && !onlySegments.Contains(info.name))
             continue;
         var segInfoStat = new Status.SegmentInfoStatus();
         result.segmentInfos.Add(segInfoStat);
         Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
         segInfoStat.name = info.name;
         segInfoStat.docCount = info.docCount;
         
         int toLoseDocCount = info.docCount;
         
         SegmentReader reader = null;
         
         try
         {
             Msg("    compound=" + info.GetUseCompoundFile());
             segInfoStat.compound = info.GetUseCompoundFile();
             Msg("    hasProx=" + info.HasProx);
             segInfoStat.hasProx = info.HasProx;
             Msg("    numFiles=" + info.Files().Count);
             segInfoStat.numFiles = info.Files().Count;
             Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
             segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
             IDictionary<string, string> diagnostics = info.Diagnostics;
             segInfoStat.diagnostics = diagnostics;
             if (diagnostics.Count > 0)
             {
                 Msg("    diagnostics = " + CollectionsHelper.CollectionToString(diagnostics));
             }
             
             int docStoreOffset = info.DocStoreOffset;
             if (docStoreOffset != - 1)
             {
                 Msg("    docStoreOffset=" + docStoreOffset);
                 segInfoStat.docStoreOffset = docStoreOffset;
                 Msg("    docStoreSegment=" + info.DocStoreSegment);
                 segInfoStat.docStoreSegment = info.DocStoreSegment;
                 Msg("    docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile);
                 segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile;
             }
             System.String delFileName = info.GetDelFileName();
             if (delFileName == null)
             {
                 Msg("    no deletions");
                 segInfoStat.hasDeletions = false;
             }
             else
             {
                 Msg("    has deletions [delFileName=" + delFileName + "]");
                 segInfoStat.hasDeletions = true;
                 segInfoStat.deletionsFileName = delFileName;
             }
             if (infoStream != null)
                 infoStream.Write("    test: open reader.........");
             reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
             
             segInfoStat.openReaderPassed = true;
             
             int numDocs = reader.NumDocs();
             toLoseDocCount = numDocs;
             if (reader.HasDeletions)
             {
                 if (reader.deletedDocs.Count() != info.GetDelCount())
                 {
                     throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                 }
                 if (reader.deletedDocs.Count() > reader.MaxDoc)
                 {
                     throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                 }
                 if (info.docCount - numDocs != info.GetDelCount())
                 {
                     throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                 }
                 segInfoStat.numDeleted = info.docCount - numDocs;
                 Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
             }
             else
             {
                 if (info.GetDelCount() != 0)
                 {
                     throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                 }
                 Msg("OK");
             }
             if (reader.MaxDoc != info.docCount)
                 throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount);
             
             // Test getFieldNames()
             if (infoStream != null)
             {
                 infoStream.Write("    test: fields..............");
             }
             ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
             Msg("OK [" + fieldNames.Count + " fields]");
             segInfoStat.numFields = fieldNames.Count;
             
             // Test Field Norms
             segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);
             
             // Test the Term Index
             segInfoStat.termIndexStatus = TestTermIndex(info, reader);
             
             // Test Stored Fields
             segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);
             
             // Test Term Vectors
             segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);
             
             // Rethrow the first exception we encountered
             //  This will cause stats for failed segments to be incremented properly
             if (segInfoStat.fieldNormStatus.error != null)
             {
                 throw new SystemException("Field Norm test failed");
             }
             else if (segInfoStat.termIndexStatus.error != null)
             {
                 throw new SystemException("Term Index test failed");
             }
             else if (segInfoStat.storedFieldStatus.error != null)
             {
                 throw new SystemException("Stored Field test failed");
             }
             else if (segInfoStat.termVectorStatus.error != null)
             {
                 throw new System.SystemException("Term Vector test failed");
             }
             
             Msg("");
         }
         catch (System.Exception t)
         {
             Msg("FAILED");
             const string comment = "fixIndex() would remove reference to this segment";
             Msg("    WARNING: " + comment + "; full exception:");
             if (infoStream != null)
                 infoStream.WriteLine(t.StackTrace);
             Msg("");
             result.totLoseDocCount += toLoseDocCount;
             result.numBadSegments++;
             continue;
         }
         finally
         {
             if (reader != null)
                 reader.Close();
         }
         
         // Keeper
         result.newSegments.Add((SegmentInfo)info.Clone());
     }
     
     if (0 == result.numBadSegments)
     {
         result.clean = true;
         Msg("No problems were detected with this index.\n");
     }
     else
         Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
     
     return result;
 }
Example #55
0
		/// <summary>Returns the merges necessary to optimize the index.
		/// This merge policy defines "optimized" to mean only one
		/// segment in the index, where that segment has no
		/// deletions pending nor separate norms, and it is in
		/// compound file format if the current useCompoundFile
		/// setting is true.  This method returns multiple merges
		/// (mergeFactor at a time) so the {@link MergeScheduler}
		/// in use may make use of concurrency. 
		/// </summary>
		public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize)
		{
			MergeSpecification spec;
			
			System.Diagnostics.Debug.Assert(maxNumSegments > 0);
			
			if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
			{
				
				// Find the newest (rightmost) segment that needs to
				// be optimized (other segments may have been flushed
				// since optimize started):
				int last = infos.Count;
				while (last > 0)
				{
					SegmentInfo info = infos.Info(--last);
					if (segmentsToOptimize.Contains(info))
					{
						last++;
						break;
					}
				}
				
				if (last > 0)
				{
					
					spec = new MergeSpecification();
					
					// First, enroll all "full" merges (size
					// mergeFactor) to potentially be run concurrently:
					while (last - maxNumSegments + 1 >= mergeFactor)
					{
                        spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last)));
						last -= mergeFactor;
					}
					
					// Only if there are no full merges pending do we
					// add a final partial (< mergeFactor segments) merge:
					if (0 == spec.merges.Count)
					{
						if (maxNumSegments == 1)
						{
							
							// Since we must optimize down to 1 segment, the
							// choice is simple:
							if (last > 1 || !IsOptimized(infos.Info(0)))
                                spec.Add(MakeOneMerge(infos, infos.Range(0, last)));
						}
						else if (last > maxNumSegments)
						{
							
							// Take care to pick a partial merge that is
							// least cost, but does not make the index too
							// lopsided.  If we always just picked the
							// partial tail then we could produce a highly
							// lopsided index over time:
							
							// We must merge this many segments to leave
							// maxNumSegments in the index (from when
							// optimize was first kicked off):
							int finalMergeSize = last - maxNumSegments + 1;
							
							// Consider all possible starting points:
							long bestSize = 0;
							int bestStart = 0;
							
							for (int i = 0; i < last - finalMergeSize + 1; i++)
							{
								long sumSize = 0;
								for (int j = 0; j < finalMergeSize; j++)
									sumSize += Size(infos.Info(j + i));
								if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
								{
									bestStart = i;
									bestSize = sumSize;
								}
							}

                            spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize)));
						}
					}
				}
				else
					spec = null;
			}
			else
				spec = null;
			
			return spec;
		}
Example #56
0
		private bool IsOptimized(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize)
		{
			int numSegments = infos.Count;
			int numToOptimize = 0;
			SegmentInfo optimizeInfo = null;
			for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++)
			{
				SegmentInfo info = infos.Info(i);
				if (segmentsToOptimize.Contains(info))
				{
					numToOptimize++;
					optimizeInfo = info;
				}
			}
			
			return numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo));
		}
Example #57
0
		/// <summary> Finds merges necessary to expunge all deletes from the
		/// index.  We simply merge adjacent segments that have
		/// deletes, up to mergeFactor at a time.
		/// </summary>
		public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
		{
			int numSegments = segmentInfos.Count;
			
			if (Verbose())
				Message("findMergesToExpungeDeletes: " + numSegments + " segments");
			
			MergeSpecification spec = new MergeSpecification();
			int firstSegmentWithDeletions = - 1;
			for (int i = 0; i < numSegments; i++)
			{
				SegmentInfo info = segmentInfos.Info(i);
				int delCount = writer.NumDeletedDocs(info);
				if (delCount > 0)
				{
					if (Verbose())
						Message("  segment " + info.name + " has deletions");
					if (firstSegmentWithDeletions == - 1)
						firstSegmentWithDeletions = i;
					else if (i - firstSegmentWithDeletions == mergeFactor)
					{
						// We've seen mergeFactor segments in a row with
						// deletions, so force a merge now:
						if (Verbose())
							Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
						firstSegmentWithDeletions = i;
					}
				}
				else if (firstSegmentWithDeletions != - 1)
				{
					// End of a sequence of segments with deletions, so,
					// merge those past segments even if it's fewer than
					// mergeFactor segments
					if (Verbose())
						Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
					firstSegmentWithDeletions = - 1;
				}
			}
			
			if (firstSegmentWithDeletions != - 1)
			{
				if (Verbose())
					Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments)));
			}
			
			return spec;
		}
Example #58
0
		public virtual void  AddIndexes(Directory[] dirs)
		{
			
			EnsureOpen();
			
			NoDupDirs(dirs);
			
			// Do not allow add docs or deletes while we are running:
			docWriter.PauseAllThreads();
			
			try
			{
				
				if (infoStream != null)
					Message("flush at addIndexes");
				Flush(true, false, true);
				
				bool success = false;
				
				StartTransaction(false);
				
				try
				{
					
					int docCount = 0;
					lock (this)
					{
						EnsureOpen();
						for (int i = 0; i < dirs.Length; i++)
						{
							SegmentInfos sis = new SegmentInfos(); // read infos from dir
							sis.Read(dirs[i]);
							for (int j = 0; j < sis.Count; j++)
							{
								SegmentInfo info = sis.Info(j);
								docCount += info.docCount;
								System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info));
								segmentInfos.Add(info); // add each info
							}
						}
					}
					
					// Notify DocumentsWriter that the flushed count just increased
					docWriter.UpdateFlushedDocCount(docCount);
					
					Optimize();
					
					success = true;
				}
				finally
				{
					if (success)
					{
						CommitTransaction();
					}
					else
					{
						RollbackTransaction();
					}
				}
			}
			catch (System.OutOfMemoryException oom)
			{
				HandleOOM(oom, "addIndexes(Directory[])");
			}
			finally
			{
				if (docWriter != null)
				{
					docWriter.ResumeAllThreads();
				}
			}
		}
        internal bool ApplyDeletes(SegmentInfos infos)
        {
            lock (this)
            {
                if (!HasDeletes())
                    return false;

                if (infoStream != null)
                    Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " +
                            deletesFlushed.docIDs.Count + " deleted docIDs and " +
                            deletesFlushed.queries.Count + " deleted queries on " +
                            +infos.Count + " segments.");

                int infosEnd = infos.Count;

                int docStart = 0;
                bool any = false;
                for (int i = 0; i < infosEnd; i++)
                {
                    IndexReader reader = SegmentReader.Get(infos.Info(i), false);
                    bool success = false;
                    try
                    {
                        any |= ApplyDeletes(reader, docStart);
                        docStart += reader.MaxDoc();
                        success = true;
                    }
                    finally
                    {
                        if (reader != null)
                        {
                            try
                            {
                                if (success)
                                    reader.DoCommit();
                            }
                            finally
                            {
                                reader.DoClose();
                            }
                        }
                    }
                }

                deletesFlushed.Clear();

                return any;
            }
        }
Example #60
0
		/// <summary>Checks if any merges are now necessary and returns a
		/// {@link MergePolicy.MergeSpecification} if so.  A merge
		/// is necessary when there are more than {@link
		/// #setMergeFactor} segments at a given level.  When
		/// multiple levels have too many segments, this method
		/// will return multiple merges, allowing the {@link
		/// MergeScheduler} to use concurrency. 
		/// </summary>
		public override MergeSpecification FindMerges(SegmentInfos infos)
		{
			
			int numSegments = infos.Count;
			if (Verbose())
				Message("findMerges: " + numSegments + " segments");
			
			// Compute levels, which is just log (base mergeFactor)
			// of the size of each segment
			float[] levels = new float[numSegments];
			float norm = (float) System.Math.Log(mergeFactor);
			
			for (int i = 0; i < numSegments; i++)
			{
				SegmentInfo info = infos.Info(i);
				long size = Size(info);
				
				// Floor tiny segments
				if (size < 1)
					size = 1;
				levels[i] = (float) System.Math.Log(size) / norm;
			}
			
			float levelFloor;
			if (minMergeSize <= 0)
				levelFloor = (float) 0.0;
			else
			{
				levelFloor = (float) (System.Math.Log(minMergeSize) / norm);
			}
			
			// Now, we quantize the log values into levels.  The
			// first level is any segment whose log size is within
			// LEVEL_LOG_SPAN of the max size, or, who has such as
			// segment "to the right".  Then, we find the max of all
			// other segments and use that to define the next level
			// segment, etc.
			
			MergeSpecification spec = null;
			
			int start = 0;
			while (start < numSegments)
			{
				
				// Find max level of all segments not already
				// quantized.
				float maxLevel = levels[start];
				for (int i = 1 + start; i < numSegments; i++)
				{
					float level = levels[i];
					if (level > maxLevel)
						maxLevel = level;
				}
				
				// Now search backwards for the rightmost segment that
				// falls into this level:
				float levelBottom;
				if (maxLevel < levelFloor)
				// All remaining segments fall into the min level
					levelBottom = - 1.0F;
				else
				{
					levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);
					
					// Force a boundary at the level floor
					if (levelBottom < levelFloor && maxLevel >= levelFloor)
						levelBottom = levelFloor;
				}
				
				int upto = numSegments - 1;
				while (upto >= start)
				{
					if (levels[upto] >= levelBottom)
					{
						break;
					}
					upto--;
				}
				if (Verbose())
					Message("  level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
				
				// Finally, record all merges that are viable at this level:
				int end = start + mergeFactor;
				while (end <= 1 + upto)
				{
					bool anyTooLarge = false;
					for (int i = start; i < end; i++)
					{
						SegmentInfo info = infos.Info(i);
						anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);
					}
					
					if (!anyTooLarge)
					{
						if (spec == null)
							spec = new MergeSpecification();
						if (Verbose())
							Message("    " + start + " to " + end + ": add this merge");
                        spec.Add(MakeOneMerge(infos, infos.Range(start, end)));
					}
					else if (Verbose())
						Message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
					
					start = end;
					end = start + mergeFactor;
				}
				
				start = 1 + upto;
			}
			
			return spec;
		}