abstract protected internal long Size(SegmentInfo info);
Exemple #2
			/// <summary> Obtain a SegmentReader from the readerPool.  The reader
			/// must be returned by calling <see cref="Release(SegmentReader)" />
			/// </summary>
			/// <seealso cref="Release(SegmentReader)">
			/// </seealso>
			/// <param name="info">
			/// </param>
			/// <param name="doOpenStores">
			/// </param>
			/// <throws>  IOException </throws>
			public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores)
				lock (this)
                    return Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, enclosingInstance.readerTermsIndexDivisor);
Exemple #3
			// Returns a ref
			public virtual SegmentReader GetIfExists(SegmentInfo info)
				lock (this)
					SegmentReader sr = readerMap[info];
					if (sr != null)
					return sr;
Exemple #4
			public virtual SegmentInfo MapToLive(SegmentInfo info)
				lock (this)
					int idx = Enclosing_Instance.segmentInfos.IndexOf(info);
					if (idx != - 1)
						info = Enclosing_Instance.segmentInfos[idx];
					return info;
Exemple #5
        private void SetDiagnostics(SegmentInfo info, System.String source, IDictionary<string, string> details)
            IDictionary<string, string> diagnostics = new Dictionary<string,string>();
			diagnostics["source"] = source;
			diagnostics["lucene.version"] = Constants.LUCENE_VERSION;
			diagnostics["os"] = Constants.OS_NAME + "";
			diagnostics["os.arch"] = Constants.OS_ARCH + "";
			diagnostics["os.version"] = Constants.OS_VERSION + "";
			diagnostics["java.version"] = Constants.JAVA_VERSION + "";
			diagnostics["java.vendor"] = Constants.JAVA_VENDOR + "";
			if (details != null)
				//System.Collections.ArrayList keys = new System.Collections.ArrayList(details.Keys);
				//System.Collections.ArrayList values = new System.Collections.ArrayList(details.Values);
                foreach (string key in details.Keys)
                    diagnostics[key] = details[key];
			info.Diagnostics = diagnostics;
Exemple #6
		/// <summary> Returns true if a newly flushed (not from merge)
		/// segment should use the compound file format.
		/// </summary>
		public abstract bool UseCompoundFile(SegmentInfos segments, SegmentInfo newSegment);
Exemple #7
		// TODO: this method should not have to be entirely
		// synchronized, ie, merges should be allowed to commit
		// even while a flush is happening
		private bool DoFlushInternal(bool flushDocStores, bool flushDeletes)
			lock (this)
				if (hitOOM)
					throw new System.SystemException("this writer hit an OutOfMemoryError; cannot flush");

				// If we are flushing because too many deletes
				// accumulated, then we should apply the deletes to free
				// RAM:
				flushDeletes |= docWriter.DoApplyDeletes();
				// Make sure no threads are actively adding a document.
				// Returns true if docWriter is currently aborting, in
				// which case we skip flushing this segment
                if (infoStream != null)
                    Message("flush: now pause all indexing threads");
				if (docWriter.PauseAllThreads())
					return false;
					SegmentInfo newSegment = null;
					int numDocs = docWriter.NumDocsInRAM;
					// Always flush docs if there are any
					bool flushDocs = numDocs > 0;
					System.String docStoreSegment = docWriter.DocStoreSegment;

                    System.Diagnostics.Debug.Assert(docStoreSegment != null || numDocs == 0, "dss=" + docStoreSegment + " numDocs=" + numDocs);
					if (docStoreSegment == null)
						flushDocStores = false;
					int docStoreOffset = docWriter.DocStoreOffset;
					bool docStoreIsCompoundFile = false;
					if (infoStream != null)
						Message("  flush: segment=" + docWriter.Segment + " docStoreSegment=" + docWriter.DocStoreSegment + " docStoreOffset=" + docStoreOffset + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + " numDocs=" + numDocs + " numBufDelTerms=" + docWriter.GetNumBufferedDeleteTerms());
						Message("  index before flush " + SegString());
					// Check if the doc stores must be separately flushed
					// because other segments, besides the one we are about
					// to flush, reference it
					if (flushDocStores && (!flushDocs || !docWriter.Segment.Equals(docWriter.DocStoreSegment)))
						// We must separately flush the doc store
						if (infoStream != null)
							Message("  flush shared docStore segment " + docStoreSegment);
						docStoreIsCompoundFile = FlushDocStores();
						flushDocStores = false;
					System.String segment = docWriter.Segment;
					// If we are flushing docs, segment must not be null:
					System.Diagnostics.Debug.Assert(segment != null || !flushDocs);
					if (flushDocs)
						bool success = false;
						int flushedDocCount;
							flushedDocCount = docWriter.Flush(flushDocStores);
                            if (infoStream != null)
                                Message("flushedFiles=" + docWriter.GetFlushedFiles());
							success = true;
							if (!success)
								if (infoStream != null)
									Message("hit exception flushing segment " + segment);
						if (0 == docStoreOffset && flushDocStores)
							// This means we are flushing private doc stores
							// with this segment, so it will not be shared
							// with other segments
							System.Diagnostics.Debug.Assert(docStoreSegment != null);
							docStoreOffset = - 1;
							docStoreIsCompoundFile = false;
							docStoreSegment = null;
						// Create new SegmentInfo, but do not add to our
						// segmentInfos until deletes are flushed
						// successfully.
						newSegment = new SegmentInfo(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter.HasProx());
						SetDiagnostics(newSegment, "flush");
					if (flushDocs)
					if (flushDocs && mergePolicy.UseCompoundFile(segmentInfos, newSegment))
						// Now build compound file
						bool success = false;
							success = true;
							if (!success)
								if (infoStream != null)
									Message("hit exception creating compound file for newly flushed segment " + segment);
								deleter.DeleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
					if (flushDeletes)
					if (flushDocs)
					return flushDocs;
				catch (System.OutOfMemoryException oom)
					HandleOOM(oom, "doFlush");
					// never hit
					return false;
		public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info)
			return useCompoundFile;
	    abstract protected internal long Size(SegmentInfo info);
Exemple #10
        /// <summary>Returns a <see cref="Status" /> instance detailing
        /// the state of the index.
        /// </summary>
        /// <param name="onlySegments">list of specific segment names to check
        /// <p/>As this method checks every byte in the specified
        /// segments, on a large index it can take quite a long
        /// time to run.
        /// <p/><b>WARNING</b>: make sure
        /// you only call this when the index is not opened by any
        /// writer.
        /// </param>
        public virtual Status CheckIndex_Renamed_Method(List <string> onlySegments)
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis    = new SegmentInfos();
            Status       result = new Status();

            result.dir = dir;
            catch (System.Exception t)
                Msg("ERROR: could not read any segments file in directory");
                result.missingSegments = true;
                if (infoStream != null)

            int        numSegments      = sis.Count;
            var        segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput input            = null;

                input = dir.OpenInput(segmentsFileName);
            catch (System.Exception t)
                Msg("ERROR: could not open segments file in directory");
                if (infoStream != null)
                result.cantOpenSegments = true;
            int format = 0;

                format = input.ReadInt();
            catch (System.Exception t)
                Msg("ERROR: could not read segment file version in directory");
                if (infoStream != null)
                result.missingSegmentVersion = true;
                if (input != null)

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
                sFormat = "FORMAT [Lucene Pre-2.1]";
            if (format == SegmentInfos.FORMAT_LOCKLESS)
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
                if (format == SegmentInfos.FORMAT_CHECKSUM)
                    sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
                else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                    sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
                else if (format == SegmentInfos.FORMAT_HAS_PROX)
                    sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
                else if (format == SegmentInfos.FORMAT_USER_DATA)
                    sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
                else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                    sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
                else if (format < SegmentInfos.CURRENT_FORMAT)
                    sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                    skip    = true;
                    sFormat = format + " [Lucene 1.3 or prior]";

            result.segmentsFileName = segmentsFileName;
            result.numSegments      = numSegments;
            result.segmentFormat    = sFormat;
            result.userData         = sis.UserData;
            System.String userDataString;
            if (sis.UserData.Count > 0)
                userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData);
                userDataString = "";

            Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

            if (onlySegments != null)
                result.partial = true;
                if (infoStream != null)
                    infoStream.Write("\nChecking only these segments:");
                foreach (string s in onlySegments)
                    if (infoStream != null)
                        infoStream.Write(" " + s);

            if (skip)
                Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.toolOutOfDate = true;

            result.newSegments = (SegmentInfos)sis.Clone();

            for (int i = 0; i < numSegments; i++)
                SegmentInfo info = sis.Info(i);
                if (onlySegments != null && !onlySegments.Contains(info.name))
                var segInfoStat = new Status.SegmentInfoStatus();
                Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                segInfoStat.name     = info.name;
                segInfoStat.docCount = info.docCount;

                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                    Msg("    compound=" + info.GetUseCompoundFile());
                    segInfoStat.compound = info.GetUseCompoundFile();
                    Msg("    hasProx=" + info.HasProx);
                    segInfoStat.hasProx = info.HasProx;
                    Msg("    numFiles=" + info.Files().Count);
                    segInfoStat.numFiles = info.Files().Count;
                    Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    IDictionary <string, string> diagnostics = info.Diagnostics;
                    segInfoStat.diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                        Msg("    diagnostics = " + CollectionsHelper.CollectionToString(diagnostics));

                    int docStoreOffset = info.DocStoreOffset;
                    if (docStoreOffset != -1)
                        Msg("    docStoreOffset=" + docStoreOffset);
                        segInfoStat.docStoreOffset = docStoreOffset;
                        Msg("    docStoreSegment=" + info.DocStoreSegment);
                        segInfoStat.docStoreSegment = info.DocStoreSegment;
                        Msg("    docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile);
                        segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile;
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                        Msg("    no deletions");
                        segInfoStat.hasDeletions = false;
                        Msg("    has deletions [delFileName=" + delFileName + "]");
                        segInfoStat.hasDeletions      = true;
                        segInfoStat.deletionsFileName = delFileName;
                    if (infoStream != null)
                        infoStream.Write("    test: open reader.........");
                    reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);

                    segInfoStat.openReaderPassed = true;

                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions)
                        if (reader.deletedDocs.Count() != info.GetDelCount())
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        if (reader.deletedDocs.Count() > reader.MaxDoc)
                            throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        if (info.docCount - numDocs != info.GetDelCount())
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        segInfoStat.numDeleted = info.docCount - numDocs;
                        Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                        if (info.GetDelCount() != 0)
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                    if (reader.MaxDoc != info.docCount)
                        throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount);

                    // Test getFieldNames()
                    if (infoStream != null)
                        infoStream.Write("    test: fields..............");
                    ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    Msg("OK [" + fieldNames.Count + " fields]");
                    segInfoStat.numFields = fieldNames.Count;

                    // Test Field Norms
                    segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);

                    // Test the Term Index
                    segInfoStat.termIndexStatus = TestTermIndex(info, reader);

                    // Test Stored Fields
                    segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);

                    // Test Term Vectors
                    segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);

                    // Rethrow the first exception we encountered
                    //  This will cause stats for failed segments to be incremented properly
                    if (segInfoStat.fieldNormStatus.error != null)
                        throw new SystemException("Field Norm test failed");
                    else if (segInfoStat.termIndexStatus.error != null)
                        throw new SystemException("Term Index test failed");
                    else if (segInfoStat.storedFieldStatus.error != null)
                        throw new SystemException("Stored Field test failed");
                    else if (segInfoStat.termVectorStatus.error != null)
                        throw new System.SystemException("Term Vector test failed");

                catch (System.Exception t)
                    const string comment = "fixIndex() would remove reference to this segment";
                    Msg("    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    result.totLoseDocCount += toLoseDocCount;
                    if (reader != null)

                // Keeper

            if (0 == result.numBadSegments)
                result.clean = true;
                Msg("No problems were detected with this index.\n");
                Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");

Exemple #11
        /// <summary> Test the term index.</summary>
        private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
            var status = new Status.TermIndexStatus();

                if (infoStream != null)
                    infoStream.Write("    test: terms, freq, prox...");

                TermEnum      termEnum      = reader.Terms();
                TermPositions termPositions = reader.TermPositions();

                // Used only to count up # deleted docs for this term
                var myTermDocs = new MySegmentTermDocs(reader);

                int maxDoc = reader.MaxDoc;

                while (termEnum.Next())
                    Term term    = termEnum.Term;
                    int  docFreq = termEnum.DocFreq();
                    int lastDoc = -1;
                    int freq0   = 0;
                    status.totFreq += docFreq;
                    while (termPositions.Next())
                        int doc  = termPositions.Doc;
                        int freq = termPositions.Freq;
                        if (doc <= lastDoc)
                            throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
                        if (doc >= maxDoc)
                            throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);

                        lastDoc = doc;
                        if (freq <= 0)
                            throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");

                        int lastPos = -1;
                        status.totPos += freq;
                        for (int j = 0; j < freq; j++)
                            int pos = termPositions.NextPosition();
                            if (pos < -1)
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                            if (pos < lastPos)
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                            lastPos = pos;

                    // Now count how many deleted docs occurred in
                    // this term:
                    int delCount;
                    if (reader.HasDeletions)
                        while (myTermDocs.Next())
                        delCount = myTermDocs.delCount;
                        delCount = 0;

                    if (freq0 + delCount != docFreq)
                        throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);

                Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
            catch (System.Exception e)
                Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
                status.error = e;
                if (infoStream != null)

        /// <summary>Checks if any merges are now necessary and returns a
        /// <see cref="MergePolicy.MergeSpecification" /> if so.  A merge
        /// is necessary when there are more than <see cref="MergeFactor" />
        /// segments at a given level.  When
        /// multiple levels have too many segments, this method
        /// will return multiple merges, allowing the <see cref="MergeScheduler" />
        /// to use concurrency.
        /// </summary>
        public override MergeSpecification FindMerges(SegmentInfos infos)
            int numSegments = infos.Count;

            if (Verbose())
                Message("findMerges: " + numSegments + " segments");

            // Compute levels, which is just log (base mergeFactor)
            // of the size of each segment
            float[] levels = new float[numSegments];
            float   norm   = (float)System.Math.Log(mergeFactor);

            for (int i = 0; i < numSegments; i++)
                SegmentInfo info = infos.Info(i);
                long        size = Size(info);

                // Floor tiny segments
                if (size < 1)
                    size = 1;
                levels[i] = (float)System.Math.Log(size) / norm;

            float levelFloor;

            if (minMergeSize <= 0)
                levelFloor = (float)0.0;
                levelFloor = (float)(System.Math.Log(minMergeSize) / norm);

            // Now, we quantize the log values into levels.  The
            // first level is any segment whose log size is within
            // LEVEL_LOG_SPAN of the max size, or, who has such as
            // segment "to the right".  Then, we find the max of all
            // other segments and use that to define the next level
            // segment, etc.

            MergeSpecification spec = null;

            int start = 0;

            while (start < numSegments)
                // Find max level of all segments not already
                // quantized.
                float maxLevel = levels[start];
                for (int i = 1 + start; i < numSegments; i++)
                    float level = levels[i];
                    if (level > maxLevel)
                        maxLevel = level;

                // Now search backwards for the rightmost segment that
                // falls into this level:
                float levelBottom;
                if (maxLevel < levelFloor)
                    // All remaining segments fall into the min level
                    levelBottom = -1.0F;
                    levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN);

                    // Force a boundary at the level floor
                    if (levelBottom < levelFloor && maxLevel >= levelFloor)
                        levelBottom = levelFloor;

                int upto = numSegments - 1;
                while (upto >= start)
                    if (levels[upto] >= levelBottom)
                if (Verbose())
                    Message("  level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");

                // Finally, record all merges that are viable at this level:
                int end = start + mergeFactor;
                while (end <= 1 + upto)
                    bool anyTooLarge = false;
                    for (int i = start; i < end; i++)
                        SegmentInfo info = infos.Info(i);
                        anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);

                    if (!anyTooLarge)
                        if (spec == null)
                            spec = new MergeSpecification();
                        if (Verbose())
                            Message("    " + start + " to " + end + ": add this merge");
                        spec.Add(MakeOneMerge(infos, infos.Range(start, end)));
                    else if (Verbose())
                        Message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");

                    start = end;
                    end   = start + mergeFactor;

                start = 1 + upto;

        /// <summary> Finds merges necessary to expunge all deletes from the
        /// index.  We simply merge adjacent segments that have
        /// deletes, up to mergeFactor at a time.
        /// </summary>
        public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
            int numSegments = segmentInfos.Count;

            if (Verbose())
                Message("findMergesToExpungeDeletes: " + numSegments + " segments");

            MergeSpecification spec       = new MergeSpecification();
            int firstSegmentWithDeletions = -1;

            for (int i = 0; i < numSegments; i++)
                SegmentInfo info     = segmentInfos.Info(i);
                int         delCount = writer.NumDeletedDocs(info);
                if (delCount > 0)
                    if (Verbose())
                        Message("  segment " + info.name + " has deletions");
                    if (firstSegmentWithDeletions == -1)
                        firstSegmentWithDeletions = i;
                    else if (i - firstSegmentWithDeletions == mergeFactor)
                        // We've seen mergeFactor segments in a row with
                        // deletions, so force a merge now:
                        if (Verbose())
                            Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
                        firstSegmentWithDeletions = i;
                else if (firstSegmentWithDeletions != -1)
                    // End of a sequence of segments with deletions, so,
                    // merge those past segments even if it's fewer than
                    // mergeFactor segments
                    if (Verbose())
                        Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
                    firstSegmentWithDeletions = -1;

            if (firstSegmentWithDeletions != -1)
                if (Verbose())
                    Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments)));

        /// <summary>Returns the merges necessary to optimize the index.
        /// This merge policy defines "optimized" to mean only one
        /// segment in the index, where that segment has no
        /// deletions pending nor separate norms, and it is in
        /// compound file format if the current useCompoundFile
        /// setting is true.  This method returns multiple merges
        /// (mergeFactor at a time) so the <see cref="MergeScheduler" />
        /// in use may make use of concurrency.
        /// </summary>
        public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, ISet <SegmentInfo> segmentsToOptimize)
            MergeSpecification spec;

            System.Diagnostics.Debug.Assert(maxNumSegments > 0);

            if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
                // Find the newest (rightmost) segment that needs to
                // be optimized (other segments may have been flushed
                // since optimize started):
                int last = infos.Count;
                while (last > 0)
                    SegmentInfo info = infos.Info(--last);
                    if (segmentsToOptimize.Contains(info))

                if (last > 0)
                    spec = new MergeSpecification();

                    // First, enroll all "full" merges (size
                    // mergeFactor) to potentially be run concurrently:
                    while (last - maxNumSegments + 1 >= mergeFactor)
                        spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last)));
                        last -= mergeFactor;

                    // Only if there are no full merges pending do we
                    // add a final partial (< mergeFactor segments) merge:
                    if (0 == spec.merges.Count)
                        if (maxNumSegments == 1)
                            // Since we must optimize down to 1 segment, the
                            // choice is simple:
                            if (last > 1 || !IsOptimized(infos.Info(0)))
                                spec.Add(MakeOneMerge(infos, infos.Range(0, last)));
                        else if (last > maxNumSegments)
                            // Take care to pick a partial merge that is
                            // least cost, but does not make the index too
                            // lopsided.  If we always just picked the
                            // partial tail then we could produce a highly
                            // lopsided index over time:

                            // We must merge this many segments to leave
                            // maxNumSegments in the index (from when
                            // optimize was first kicked off):
                            int finalMergeSize = last - maxNumSegments + 1;

                            // Consider all possible starting points:
                            long bestSize  = 0;
                            int  bestStart = 0;

                            for (int i = 0; i < last - finalMergeSize + 1; i++)
                                long sumSize = 0;
                                for (int j = 0; j < finalMergeSize; j++)
                                    sumSize += Size(infos.Info(j + i));
                                if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
                                    bestStart = i;
                                    bestSize  = sumSize;

                            spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize)));
                    spec = null;
                spec = null;

Exemple #15
		/// <summary> Test term vectors for a segment.</summary>
        private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
			var status = new Status.TermVectorStatus();
				if (infoStream != null)
					infoStream.Write("    test: term vectors........");
				for (int j = 0; j < info.docCount; ++j)
					if (!reader.IsDeleted(j))
						ITermFreqVector[] tfv = reader.GetTermFreqVectors(j);
						if (tfv != null)
							status.totVectors += tfv.Length;
                Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
			catch (System.Exception e)
				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
				status.error = e;
				if (infoStream != null)
			return status;
		protected internal virtual long SizeDocs(SegmentInfo info)
			if (internalCalibrateSizeByDeletes)
				int delCount = writer.NumDeletedDocs(info);
				return (info.docCount - (long) delCount);
				return info.docCount;
		/// <summary>Flush all pending docs to a new segment </summary>
		internal int Flush(bool closeDocStore)
			lock (this)
				System.Diagnostics.Debug.Assert(numDocsInRAM > 0);
				System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM);
				System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0);
				System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
				docStoreOffset = numDocsInStore;
				if (infoStream != null)
					Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM);
				bool success = false;
					if (closeDocStore)
						System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null);
						flushState.numDocsInStore = 0;
					ICollection<DocConsumerPerThread> threads = new HashSet<DocConsumerPerThread>();
					for (int i = 0; i < threadStates.Length; i++)
					consumer.Flush(threads, flushState);
					if (infoStream != null)
                        SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory);
                        long newSegmentSize = si.SizeInBytes();
                        System.String message = System.String.Format(nf, "  oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}",
                            new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) });
					flushedDocCount += flushState.numDocs;
					success = true;
					if (!success)
				System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
				return flushState.numDocs;
		protected internal virtual long SizeBytes(SegmentInfo info)
			long byteSize = info.SizeInBytes();
			if (internalCalibrateSizeByDeletes)
				int delCount = writer.NumDeletedDocs(info);
				float delRatio = (info.docCount <= 0?0.0f:((float) delCount / (float) info.docCount));
				return (info.docCount <= 0?byteSize:(long) (byteSize * (1.0f - delRatio)));
				return byteSize;
Exemple #19
		/// <summary>Merges the provided indexes into this index.
		/// <p/>After this completes, the index is optimized. <p/>
		/// <p/>The provided IndexReaders are not closed.<p/>
		/// <p/><b>NOTE:</b> while this is running, any attempts to
		/// add or delete documents (with another thread) will be
		/// paused until this method completes.
		/// <p/>See <see cref="AddIndexesNoOptimize(Directory[])" /> for
		/// details on transactional semantics, temporary free
		/// space required in the Directory, and non-CFS segments
		/// on an Exception.<p/>
		/// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
		/// you should immediately close the writer.  See <a
		/// href="#OOME">above</a> for details.<p/>
		/// </summary>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public virtual void  AddIndexes(params IndexReader[] readers)
			// Do not allow add docs or deletes while we are running:
			// We must pre-acquire a read lock here (and upgrade to
			// write lock in startTransaction below) so that no
			// other addIndexes is allowed to start up after we have
			// flushed & optimized but before we then start our
			// transaction.  This is because the merging below
			// requires that only one segment is present in the
			// index:
				SegmentInfo info = null;
				System.String mergedName = null;
				SegmentMerger merger = null;
				bool success = false;
					Flush(true, false, true);
					Optimize(); // start with zero or 1 seg
					success = true;
					// Take care to release the read lock if we hit an
					// exception before starting the transaction
					if (!success)
				// true means we already have a read lock; if this
				// call hits an exception it will release the write
				// lock:
					mergedName = NewSegmentName();
					merger = new SegmentMerger(this, mergedName, null);
					SegmentReader sReader = null;
					lock (this)
						if (segmentInfos.Count == 1)
							// add existing index, if any
							sReader = readerPool.Get(segmentInfos.Info(0), true, BufferedIndexInput.BUFFER_SIZE, - 1);
					success = false;
						if (sReader != null)
						for (int i = 0; i < readers.Length; i++)
						// add new indexes
						int docCount = merger.Merge(); // merge 'em
						lock (this)
							segmentInfos.Clear(); // pop old infos & add new
							info = new SegmentInfo(mergedName, docCount, directory, false, true, - 1, null, false, merger.HasProx());
							SetDiagnostics(info, "addIndexes(params IndexReader[])");
						// Notify DocumentsWriter that the flushed count just increased
						success = true;
						if (sReader != null)
					if (!success)
						if (infoStream != null)
							Message("hit exception in addIndexes during merge");
				if (mergePolicy is LogMergePolicy && UseCompoundFile)
					IList<string> files = null;
					lock (this)
						// Must incRef our files so that if another thread
						// is running merge/optimize, it doesn't delete our
						// segment's files before we have a change to
						// finish making the compound file.
						if (segmentInfos.Contains(info))
							files = info.Files();
					if (files != null)
						success = false;
							merger.CreateCompoundFile(mergedName + ".cfs");
							lock (this)
							success = true;
                            lock (this)
							if (!success)
								if (infoStream != null)
									Message("hit exception building compound file in addIndexes during merge");
			catch (System.OutOfMemoryException oom)
				HandleOOM(oom, "addIndexes(params IndexReader[])");
				if (docWriter != null)
		/// <summary>Returns true if this single info is optimized (has no
		/// pending norms or deletes, is in the same dir as the
		/// writer, and matches the current compound file setting 
		/// </summary>
		private bool IsOptimized(SegmentInfo info)
			bool hasDeletions = writer.NumDeletedDocs(info) > 0;
			return !hasDeletions && !info.HasSeparateNorms() && info.dir == writer.Directory &&
                (info.GetUseCompoundFile() == useCompoundFile || internalNoCFSRatio < 1.0);
Exemple #21
			// used only by asserts
			public virtual bool InfoIsLive(SegmentInfo info)
				lock (this)
					int idx = Enclosing_Instance.segmentInfos.IndexOf(info);
					System.Diagnostics.Debug.Assert(idx != -1);
                    System.Diagnostics.Debug.Assert(Enclosing_Instance.segmentInfos[idx] == info);
					return true;
Exemple #22
		/// <summary> Copy everything from src SegmentInfo into our instance.</summary>
		internal void Reset(SegmentInfo src)
			name = src.name;
			docCount = src.docCount;
			dir = src.dir;
			preLockless = src.preLockless;
			delGen = src.delGen;
			docStoreOffset = src.docStoreOffset;
			docStoreIsCompoundFile = src.docStoreIsCompoundFile;
			if (src.normGen == null)
				normGen = null;
				normGen = new long[src.normGen.Length];
				Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length);
			isCompoundFile = src.isCompoundFile;
			hasSingleNormFile = src.hasSingleNormFile;
			delCount = src.delCount;
Exemple #23
		private void  SetDiagnostics(SegmentInfo info, System.String source)
			SetDiagnostics(info, source, null);
Exemple #24
		public System.Object Clone()
			SegmentInfo si = new SegmentInfo(name, docCount, dir);
			si.isCompoundFile = isCompoundFile;
			si.delGen = delGen;
			si.delCount = delCount;
			si.hasProx = hasProx;
			si.preLockless = preLockless;
			si.hasSingleNormFile = hasSingleNormFile;
		    si.diagnostics = new HashMap<string, string>(this.diagnostics);
            if (this.diagnostics != null)
                si.diagnostics = new System.Collections.Generic.Dictionary<string, string>();
                foreach (string o in diagnostics.Keys)
			if (normGen != null)
				si.normGen = new long[normGen.Length];
				normGen.CopyTo(si.normGen, 0);
			si.docStoreOffset = docStoreOffset;
			si.docStoreSegment = docStoreSegment;
			si.docStoreIsCompoundFile = docStoreIsCompoundFile;
            if (this.files != null)
                si.files = new System.Collections.Generic.List<string>();
                foreach (string file in files)
			return si;
Exemple #25
			/// <summary> Returns a ref to a clone.  NOTE: this clone is not
			/// enrolled in the pool, so you should simply close()
			/// it when you're done (ie, do not call release()).
			/// </summary>
			public virtual SegmentReader GetReadOnlyClone(SegmentInfo info, bool doOpenStores, int termInfosIndexDivisor)
				lock (this)
					SegmentReader sr = Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor);
						return (SegmentReader) sr.Clone(true);
Exemple #26
		/// <summary> Test the term index.</summary>
		private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
			var status = new Status.TermIndexStatus();
				if (infoStream != null)
					infoStream.Write("    test: terms, freq, prox...");
				TermEnum termEnum = reader.Terms();
				TermPositions termPositions = reader.TermPositions();
				// Used only to count up # deleted docs for this term
				var myTermDocs = new MySegmentTermDocs(reader);
				int maxDoc = reader.MaxDoc;
				while (termEnum.Next())
					Term term = termEnum.Term;
					int docFreq = termEnum.DocFreq();
					int lastDoc = - 1;
					int freq0 = 0;
					status.totFreq += docFreq;
					while (termPositions.Next())
						int doc = termPositions.Doc;
						int freq = termPositions.Freq;
						if (doc <= lastDoc)
							throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
						if (doc >= maxDoc)
							throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
						lastDoc = doc;
						if (freq <= 0)
							throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
						int lastPos = - 1;
						status.totPos += freq;
						for (int j = 0; j < freq; j++)
							int pos = termPositions.NextPosition();
							if (pos < - 1)
								throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
							if (pos < lastPos)
								throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
						    lastPos = pos;
					// Now count how many deleted docs occurred in
					// this term:
					int delCount;
					if (reader.HasDeletions)
						while (myTermDocs.Next())
						delCount = myTermDocs.delCount;
						delCount = 0;
					if (freq0 + delCount != docFreq)
						throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
				Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
			catch (System.Exception e)
				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
				status.error = e;
				if (infoStream != null)
			return status;
Exemple #27
			/// <summary> Obtain a SegmentReader from the readerPool.  The reader
			/// must be returned by calling <see cref="Release(SegmentReader)" />
			/// </summary>
			/// <seealso cref="Release(SegmentReader)">
			/// </seealso>
			/// <param name="info">
			/// </param>
			/// <param name="doOpenStores">
			/// </param>
			/// <param name="readBufferSize">
			/// </param>
			/// <param name="termsIndexDivisor">
			/// </param>
			/// <throws>  IOException </throws>
			public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores, int readBufferSize, int termsIndexDivisor)
				lock (this)
					if (Enclosing_Instance.poolReaders)
						readBufferSize = BufferedIndexInput.BUFFER_SIZE;
					SegmentReader sr = readerMap[info];
					if (sr == null)
						// TODO: we may want to avoid doing this while
						// synchronized
						// Returns a ref, which we xfer to readerMap:
						sr = SegmentReader.Get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
                        if (info.dir == enclosingInstance.directory)
                            // Only pool if reader is not external
						if (doOpenStores)
						if (termsIndexDivisor != - 1 && !sr.TermsIndexLoaded())
							// If this reader was originally opened because we
							// needed to merge it, we didn't load the terms
							// index.  But now, if the caller wants the terms
							// index (eg because it's doing deletes, or an NRT
							// reader is being opened) we ask the reader to
							// load its terms index.
					// Return a ref to our caller
                    if (info.dir == enclosingInstance.directory)
                        // Only incRef if we pooled (reader is not external)
					return sr;
Exemple #28
		/// <summary> Test stored fields for a segment.</summary>
		private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
			var status = new Status.StoredFieldStatus();
				if (infoStream != null)
					infoStream.Write("    test: stored fields.......");
				// Scan stored fields for all documents
				for (int j = 0; j < info.docCount; ++j)
					if (!reader.IsDeleted(j))
						Document.Document doc = reader.Document(j);
						status.totFields += doc.GetFields().Count;
				// Validate docCount
				if (status.docCount != reader.NumDocs())
					throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
                Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) }));
			catch (System.Exception e)
				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
				status.error = e;
				if (infoStream != null)
			return status;
Exemple #29
		/// <summary> Obtain the number of deleted docs for a pooled reader.
		/// If the reader isn't being pooled, the segmentInfo's 
		/// delCount is returned.
		/// </summary>
		public virtual int NumDeletedDocs(SegmentInfo info)
			SegmentReader reader = readerPool.GetIfExists(info);
				if (reader != null)
					return reader.NumDeletedDocs;
					return info.GetDelCount();
				if (reader != null)
 public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info)