Ejemplo n.º 1
0
        internal int docShift;         // total # deleted docs that were compacted by this merge

        public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount)
        {
            this.docMaps = docMaps;
            SegmentInfo firstSegment = merge.segments.Info(0);
            int         i            = 0;

            while (true)
            {
                SegmentInfo info = infos.Info(i);
                if (info.Equals(firstSegment))
                {
                    break;
                }
                minDocID += info.docCount;
                i++;
            }

            int numDocs = 0;

            for (int j = 0; j < docMaps.Length; i++, j++)
            {
                numDocs += infos.Info(i).docCount;
                System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j)));
            }
            maxDocID = minDocID + numDocs;

            starts    = new int[docMaps.Length];
            newStarts = new int[docMaps.Length];

            starts[0]    = minDocID;
            newStarts[0] = minDocID;
            for (i = 1; i < docMaps.Length; i++)
            {
                int lastDocCount = merge.segments.Info(i - 1).docCount;
                starts[i]    = starts[i - 1] + lastDocCount;
                newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1];
            }
            docShift = numDocs - mergedDocCount;

            // There are rare cases when docShift is 0.  It happens
            // if you try to delete a docID that's out of bounds,
            // because the SegmentReader still allocates deletedDocs
            // and pretends it has deletions ... so we can't make
            // this assert here
            // assert docShift > 0;

            // Make sure it all adds up:
            System.Diagnostics.Debug.Assert(docShift == maxDocID - (newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1]));
        }
Ejemplo n.º 2
0
        protected OneMerge MakeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge)
        {
            bool doCFS;

            if (!useCompoundFile)
            {
                doCFS = false;
            }
            else if (noCFSRatio == 1.0)
            {
                doCFS = true;
            }
            else
            {
                long totSize = 0;
                for (int i = 0; i < infos.Count; i++)
                {
                    totSize += Size(infos.Info(i));
                }
                long mergeSize = 0;
                for (int i = 0; i < infosToMerge.Count; i++)
                {
                    mergeSize += Size(infosToMerge.Info(i));
                }

                doCFS = mergeSize <= noCFSRatio * totSize;
            }

            return(new OneMerge(infosToMerge, doCFS));
        }
Ejemplo n.º 3
0
		internal int docShift; // total # deleted docs that were compacted by this merge
		
		public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount)
		{
			this.docMaps = docMaps;
			SegmentInfo firstSegment = merge.segments.Info(0);
			int i = 0;
			while (true)
			{
				SegmentInfo info = infos.Info(i);
				if (info.Equals(firstSegment))
					break;
				minDocID += info.docCount;
				i++;
			}
			
			int numDocs = 0;
			for (int j = 0; j < docMaps.Length; i++, j++)
			{
				numDocs += infos.Info(i).docCount;
				System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j)));
			}
			maxDocID = minDocID + numDocs;
			
			starts = new int[docMaps.Length];
			newStarts = new int[docMaps.Length];
			
			starts[0] = minDocID;
			newStarts[0] = minDocID;
			for (i = 1; i < docMaps.Length; i++)
			{
				int lastDocCount = merge.segments.Info(i - 1).docCount;
				starts[i] = starts[i - 1] + lastDocCount;
				newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1];
			}
			docShift = numDocs - mergedDocCount;
			
			// There are rare cases when docShift is 0.  It happens
			// if you try to delete a docID that's out of bounds,
			// because the SegmentReader still allocates deletedDocs
			// and pretends it has deletions ... so we can't make
			// this assert here
			// assert docShift > 0;
			
			// Make sure it all adds up:
			System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1]));
		}
Ejemplo n.º 4
0
            public CommitPoint(IndexFileDeleter enclosingInstance, System.Collections.ICollection commitsToDelete, Directory directory, SegmentInfos segmentInfos)
            {
                InitBlock(enclosingInstance);
                this.directory       = directory;
                this.commitsToDelete = commitsToDelete;
                userData             = segmentInfos.GetUserData();
                segmentsFileName     = segmentInfos.GetCurrentSegmentFileName();
                version     = segmentInfos.GetVersion();
                generation  = segmentInfos.GetGeneration();
                files       = segmentInfos.Files(directory, true);
                gen         = segmentInfos.GetGeneration();
                isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions();

                System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory));
            }
Ejemplo n.º 5
0
        private bool IsOptimized(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize)
        {
            int         numSegments   = infos.Count;
            int         numToOptimize = 0;
            SegmentInfo optimizeInfo  = null;

            for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++)
            {
                SegmentInfo info = infos.Info(i);
                if (segmentsToOptimize.Contains(info))
                {
                    numToOptimize++;
                    optimizeInfo = info;
                }
            }

            return(numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo)));
        }
Ejemplo n.º 6
0
			internal virtual System.String SegString(Directory dir)
			{
				System.Text.StringBuilder b = new System.Text.StringBuilder();
				int numSegments = segments.Count;
				for (int i = 0; i < numSegments; i++)
				{
					if (i > 0)
						b.Append(' ');
					b.Append(segments.Info(i).SegString(dir));
				}
				if (info != null)
					b.Append(" into ").Append(info.name);
				if (optimize)
					b.Append(" [optimize]");
				if (mergeDocStores)
				{
					b.Append(" [mergeDocStores]");
				}
				return b.ToString();
			}
Ejemplo n.º 7
0
		/// <summary>Returns a {@link Status} instance detailing
		/// the state of the index.
		/// 
		/// </summary>
		/// <param name="onlySegments">list of specific segment names to check
		/// 
		/// <p/>As this method checks every byte in the specified
		/// segments, on a large index it can take quite a long
		/// time to run.
		/// 
		/// <p/><b>WARNING</b>: make sure
		/// you only call this when the index is not opened by any
		/// writer. 
		/// </param>
		public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments)
		{
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
			SegmentInfos sis = new SegmentInfos();
			Status result = new Status();
			result.dir = dir;
			try
			{
				sis.Read(dir);
			}
			catch (System.Exception t)
			{
				Msg("ERROR: could not read any segments file in directory");
				result.missingSegments = true;
				if (infoStream != null)
					infoStream.WriteLine(t.StackTrace);
				return result;
			}
			
			int numSegments = sis.Count;
			System.String segmentsFileName = sis.GetCurrentSegmentFileName();
			IndexInput input = null;
			try
			{
				input = dir.OpenInput(segmentsFileName);
			}
			catch (System.Exception t)
			{
				Msg("ERROR: could not open segments file in directory");
				if (infoStream != null)
					infoStream.WriteLine(t.StackTrace);
				result.cantOpenSegments = true;
				return result;
			}
			int format = 0;
			try
			{
				format = input.ReadInt();
			}
			catch (System.Exception t)
			{
				Msg("ERROR: could not read segment file version in directory");
				if (infoStream != null)
					infoStream.WriteLine(t.StackTrace);
				result.missingSegmentVersion = true;
				return result;
			}
			finally
			{
				if (input != null)
					input.Close();
			}
			
			System.String sFormat = "";
			bool skip = false;
			
			if (format == SegmentInfos.FORMAT)
				sFormat = "FORMAT [Lucene Pre-2.1]";
			if (format == SegmentInfos.FORMAT_LOCKLESS)
				sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
			else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
				sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
			else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
				sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
			else
			{
				if (format == SegmentInfos.FORMAT_CHECKSUM)
					sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
				else if (format == SegmentInfos.FORMAT_DEL_COUNT)
					sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
				else if (format == SegmentInfos.FORMAT_HAS_PROX)
					sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
				else if (format == SegmentInfos.FORMAT_USER_DATA)
					sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
				else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
					sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
				else if (format < SegmentInfos.CURRENT_FORMAT)
				{
					sFormat = "int=" + format + " [newer version of Lucene than this tool]";
					skip = true;
				}
				else
				{
					sFormat = format + " [Lucene 1.3 or prior]";
				}
			}
			
			result.segmentsFileName = segmentsFileName;
			result.numSegments = numSegments;
			result.segmentFormat = sFormat;
			result.userData = sis.GetUserData();
			System.String userDataString;
			if (sis.GetUserData().Count > 0)
			{
				userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData());
			}
			else
			{
				userDataString = "";
			}
			
			Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
			
			if (onlySegments != null)
			{
				result.partial = true;
				if (infoStream != null)
					infoStream.Write("\nChecking only these segments:");
				System.Collections.IEnumerator it = onlySegments.GetEnumerator();
				while (it.MoveNext())
				{
					if (infoStream != null)
					{
						infoStream.Write(" " + it.Current);
					}
				}
                System.Collections.IEnumerator e = onlySegments.GetEnumerator();
                while (e.MoveNext() == true)
                {
                    result.segmentsChecked.Add(e.Current);
                }
                Msg(":");
			}
			
			if (skip)
			{
				Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
				result.toolOutOfDate = true;
				return result;
			}
			
			
			result.newSegments = (SegmentInfos) sis.Clone();
			result.newSegments.Clear();
			
			for (int i = 0; i < numSegments; i++)
			{
				SegmentInfo info = sis.Info(i);
				if (onlySegments != null && !onlySegments.Contains(info.name))
					continue;
				Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
				result.segmentInfos.Add(segInfoStat);
				Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
				segInfoStat.name = info.name;
				segInfoStat.docCount = info.docCount;
				
				int toLoseDocCount = info.docCount;
				
				SegmentReader reader = null;
				
				try
				{
					Msg("    compound=" + info.GetUseCompoundFile());
					segInfoStat.compound = info.GetUseCompoundFile();
					Msg("    hasProx=" + info.GetHasProx());
					segInfoStat.hasProx = info.GetHasProx();
					Msg("    numFiles=" + info.Files().Count);
					segInfoStat.numFiles = info.Files().Count;
					Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
					segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    System.Collections.Generic.IDictionary<string, string> diagnostics = info.GetDiagnostics();
					segInfoStat.diagnostics = diagnostics;
					if (diagnostics.Count > 0)
					{
						Msg("    diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics));
					}
					
					int docStoreOffset = info.GetDocStoreOffset();
					if (docStoreOffset != - 1)
					{
						Msg("    docStoreOffset=" + docStoreOffset);
						segInfoStat.docStoreOffset = docStoreOffset;
						Msg("    docStoreSegment=" + info.GetDocStoreSegment());
						segInfoStat.docStoreSegment = info.GetDocStoreSegment();
						Msg("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
						segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
					}
					System.String delFileName = info.GetDelFileName();
					if (delFileName == null)
					{
						Msg("    no deletions");
						segInfoStat.hasDeletions = false;
					}
					else
					{
						Msg("    has deletions [delFileName=" + delFileName + "]");
						segInfoStat.hasDeletions = true;
						segInfoStat.deletionsFileName = delFileName;
					}
					if (infoStream != null)
						infoStream.Write("    test: open reader.........");
					reader = SegmentReader.Get(info);
					
					segInfoStat.openReaderPassed = true;
					
					int numDocs = reader.NumDocs();
					toLoseDocCount = numDocs;
					if (reader.HasDeletions())
					{
						if (reader.deletedDocs.Count() != info.GetDelCount())
						{
							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
						}
						if (reader.deletedDocs.Count() > reader.MaxDoc())
						{
							throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
						}
						if (info.docCount - numDocs != info.GetDelCount())
						{
							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
						}
						segInfoStat.numDeleted = info.docCount - numDocs;
						Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
					}
					else
					{
						if (info.GetDelCount() != 0)
						{
							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
						}
						Msg("OK");
					}
					if (reader.MaxDoc() != info.docCount)
						throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount);
					
					// Test getFieldNames()
					if (infoStream != null)
					{
						infoStream.Write("    test: fields..............");
					}
                    System.Collections.Generic.ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
					Msg("OK [" + fieldNames.Count + " fields]");
					segInfoStat.numFields = fieldNames.Count;
					
					// Test Field Norms
					segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);
					
					// Test the Term Index
					segInfoStat.termIndexStatus = TestTermIndex(info, reader);
					
					// Test Stored Fields
					segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);
					
					// Test Term Vectors
					segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);
					
					// Rethrow the first exception we encountered
					//  This will cause stats for failed segments to be incremented properly
					if (segInfoStat.fieldNormStatus.error != null)
					{
						throw new System.SystemException("Field Norm test failed");
					}
					else if (segInfoStat.termIndexStatus.error != null)
					{
						throw new System.SystemException("Term Index test failed");
					}
					else if (segInfoStat.storedFieldStatus.error != null)
					{
						throw new System.SystemException("Stored Field test failed");
					}
					else if (segInfoStat.termVectorStatus.error != null)
					{
						throw new System.SystemException("Term Vector test failed");
					}
					
					Msg("");
				}
				catch (System.Exception t)
				{
					Msg("FAILED");
					System.String comment;
					comment = "fixIndex() would remove reference to this segment";
					Msg("    WARNING: " + comment + "; full exception:");
					if (infoStream != null)
						infoStream.WriteLine(t.StackTrace);
					Msg("");
					result.totLoseDocCount += toLoseDocCount;
					result.numBadSegments++;
					continue;
				}
				finally
				{
					if (reader != null)
						reader.Close();
				}
				
				// Keeper
				result.newSegments.Add(info.Clone());
			}
			
			if (0 == result.numBadSegments)
			{
				result.clean = true;
				Msg("No problems were detected with this index.\n");
			}
			else
				Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
			
			return result;
		}
Ejemplo n.º 8
0
		/// <summary>This constructor is only used for {@link #Reopen()} </summary>
		internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.IDictionary oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor)
		{
			this.directory = directory;
			this.readOnly = readOnly;
			this.segmentInfos = infos;
			this.termInfosIndexDivisor = termInfosIndexDivisor;
			if (!readOnly)
			{
				// We assume that this segments_N was previously
				// properly sync'd:
				SupportClass.CollectionsHelper.AddAllIfNotContains(synced, infos.Files(directory, true));
			}
			
			// we put the old SegmentReaders in a map, that allows us
			// to lookup a reader using its segment name
			System.Collections.IDictionary segmentReaders = new System.Collections.Hashtable();
			
			if (oldReaders != null)
			{
				// create a Map SegmentName->SegmentReader
				for (int i = 0; i < oldReaders.Length; i++)
				{
					segmentReaders[oldReaders[i].GetSegmentName()] = (System.Int32) i;
				}
			}
			
			SegmentReader[] newReaders = new SegmentReader[infos.Count];
			
			// remember which readers are shared between the old and the re-opened
			// DirectoryReader - we have to incRef those readers
			bool[] readerShared = new bool[infos.Count];
			
			for (int i = infos.Count - 1; i >= 0; i--)
			{
				// find SegmentReader for this segment
                int? oldReaderIndex = (int?)segmentReaders[infos.Info(i).name];
                if (oldReaderIndex.HasValue == false)
                {
                    // this is a new segment, no old SegmentReader can be reused
                    newReaders[i] = null;
                }
                else
                {
                    // there is an old reader for this segment - we'll try to reopen it
                    newReaders[i] = oldReaders[oldReaderIndex.Value];
                }
				
				bool success = false;
				try
				{
					SegmentReader newReader;
					if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].GetSegmentInfo().GetUseCompoundFile())
					{
						
						// We should never see a totally new segment during cloning
						System.Diagnostics.Debug.Assert(!doClone);
						
						// this is a new reader; in case we hit an exception we can close it safely
						newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor);
					}
					else
					{
						newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly);
					}
					if (newReader == newReaders[i])
					{
						// this reader will be shared between the old and the new one,
						// so we must incRef it
						readerShared[i] = true;
						newReader.IncRef();
					}
					else
					{
						readerShared[i] = false;
						newReaders[i] = newReader;
					}
					success = true;
				}
				finally
				{
					if (!success)
					{
						for (i++; i < infos.Count; i++)
						{
							if (newReaders[i] != null)
							{
								try
								{
									if (!readerShared[i])
									{
										// this is a new subReader that is not used by the old one,
										// we can close it
										newReaders[i].Close();
									}
									else
									{
										// this subReader is also used by the old reader, so instead
										// closing we must decRef it
										newReaders[i].DecRef();
									}
								}
								catch (System.IO.IOException ignore)
								{
									// keep going - we want to clean up as much as possible
								}
							}
						}
					}
				}
			}
			
			// initialize the readers to calculate maxDoc before we try to reuse the old normsCache
			Initialize(newReaders);
			
			// try to copy unchanged norms from the old normsCache to the new one
			if (oldNormsCache != null)
			{
				System.Collections.IEnumerator it = new System.Collections.Hashtable(oldNormsCache).GetEnumerator();
				while (it.MoveNext())
				{
					System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current;
					System.String field = (System.String) entry.Key;
					if (!HasNorms(field))
					{
						continue;
					}
					
					byte[] oldBytes = (byte[]) entry.Value;
					
					byte[] bytes = new byte[MaxDoc()];
					
					for (int i = 0; i < subReaders.Length; i++)
					{
                        int? oldReaderIndex = (int?)segmentReaders[subReaders[i].GetSegmentName()];

                        // this SegmentReader was not re-opened, we can copy all of its norms 
                        if (oldReaderIndex.HasValue &&
                             (oldReaders[oldReaderIndex.Value] == subReaders[i]
                               || oldReaders[oldReaderIndex.Value].norms[field] == subReaders[i].norms[field]))
                        {
                            // we don't have to synchronize here: either this constructor is called from a SegmentReader,
                            // in which case no old norms cache is present, or it is called from MultiReader.reopen(),
                            // which is synchronized
                            Array.Copy(oldBytes, oldStarts[oldReaderIndex.Value], bytes, starts[i], starts[i + 1] - starts[i]);
                        }
                        else
                        {
                            subReaders[i].Norms(field, bytes, starts[i]);
                        }
					}
					
					normsCache[field] = bytes; // update cache
				}
			}
		}
Ejemplo n.º 9
0
		// Used by near real-time search
		internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor)
		{
			this.directory = writer.GetDirectory();
			this.readOnly = true;
			segmentInfos = infos;
			segmentInfosStart = (SegmentInfos) infos.Clone();
			this.termInfosIndexDivisor = termInfosIndexDivisor;
			if (!readOnly)
			{
				// We assume that this segments_N was previously
				// properly sync'd:
				SupportClass.CollectionsHelper.AddAllIfNotContains(synced, infos.Files(directory, true));
			}
			
			// IndexWriter synchronizes externally before calling
			// us, which ensures infos will not change; so there's
			// no need to process segments in reverse order
			int numSegments = infos.Count;
			SegmentReader[] readers = new SegmentReader[numSegments];
			Directory dir = writer.GetDirectory();
			int upto = 0;
			
			for (int i = 0; i < numSegments; i++)
			{
				bool success = false;
				try
				{
					SegmentInfo info = infos.Info(i);
					if (info.dir == dir)
					{
						readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor);
					}
					success = true;
				}
				finally
				{
					if (!success)
					{
						// Close all readers we had opened:
						for (upto--; upto >= 0; upto--)
						{
							try
							{
								readers[upto].Close();
							}
							catch (System.Exception ignore)
							{
								// keep going - we want to clean up as much as possible
							}
						}
					}
				}
			}
			
			this.writer = writer;
			
			if (upto < readers.Length)
			{
				// This means some segments were in a foreign Directory
				SegmentReader[] newReaders = new SegmentReader[upto];
				Array.Copy(readers, 0, newReaders, 0, upto);
				readers = newReaders;
			}
			
			Initialize(readers);
		}
Ejemplo n.º 10
0
		/// <summary>Construct reading the named set of readers. </summary>
		internal DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)
		{
			this.directory = directory;
			this.readOnly = readOnly;
			this.segmentInfos = sis;
			this.deletionPolicy = deletionPolicy;
			this.termInfosIndexDivisor = termInfosIndexDivisor;
			
			if (!readOnly)
			{
				// We assume that this segments_N was previously
				// properly sync'd:
				SupportClass.CollectionsHelper.AddAllIfNotContains(synced, sis.Files(directory, true));
			}
			
			// To reduce the chance of hitting FileNotFound
			// (and having to retry), we open segments in
			// reverse because IndexWriter merges & deletes
			// the newest segments first.
			
			SegmentReader[] readers = new SegmentReader[sis.Count];
			for (int i = sis.Count - 1; i >= 0; i--)
			{
				bool success = false;
				try
				{
					readers[i] = SegmentReader.Get(readOnly, sis.Info(i), termInfosIndexDivisor);
					success = true;
				}
				finally
				{
					if (!success)
					{
						// Close all readers we had opened:
						for (i++; i < sis.Count; i++)
						{
							try
							{
								readers[i].Close();
							}
							catch (System.Exception ignore)
							{
								// keep going - we want to clean up as much as possible
							}
						}
					}
				}
			}
			
			Initialize(readers);
		}
Ejemplo n.º 11
0
			internal ReaderCommit(SegmentInfos infos, Directory dir)
			{
				segmentsFileName = infos.GetCurrentSegmentFileName();
				this.dir = dir;
				userData = infos.GetUserData();
                files = infos.Files(dir, true);
				version = infos.GetVersion();
				generation = infos.GetGeneration();
				isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions();
			}
Ejemplo n.º 12
0
		private void  SetRollbackSegmentInfos(SegmentInfos infos)
		{
			lock (this)
			{
				rollbackSegmentInfos = (SegmentInfos) infos.Clone();
				System.Diagnostics.Debug.Assert(!rollbackSegmentInfos.HasExternalSegments(directory));
				rollbackSegments = new System.Collections.Hashtable();
				int size = rollbackSegmentInfos.Count;
				for (int i = 0; i < size; i++)
					rollbackSegments[rollbackSegmentInfos.Info(i)] = (System.Int32) i;
			}
		}
Ejemplo n.º 13
0
		private System.String SegString(SegmentInfos infos)
		{
			lock (this)
			{
				System.Text.StringBuilder buffer = new System.Text.StringBuilder();
				int count = infos.Count;
				for (int i = 0; i < count; i++)
				{
					if (i > 0)
					{
						buffer.Append(' ');
					}
					SegmentInfo info = infos.Info(i);
					buffer.Append(info.SegString(directory));
					if (info.dir != directory)
						buffer.Append("**");
				}
				return buffer.ToString();
			}
		}
Ejemplo n.º 14
0
		public virtual void  AddIndexes(Directory[] dirs)
		{
			
			EnsureOpen();
			
			NoDupDirs(dirs);
			
			// Do not allow add docs or deletes while we are running:
			docWriter.PauseAllThreads();
			
			try
			{
				
				if (infoStream != null)
					Message("flush at addIndexes");
				Flush(true, false, true);
				
				bool success = false;
				
				StartTransaction(false);
				
				try
				{
					
					int docCount = 0;
					lock (this)
					{
						EnsureOpen();
						for (int i = 0; i < dirs.Length; i++)
						{
							SegmentInfos sis = new SegmentInfos(); // read infos from dir
							sis.Read(dirs[i]);
							for (int j = 0; j < sis.Count; j++)
							{
								SegmentInfo info = sis.Info(j);
								docCount += info.docCount;
								System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info));
								segmentInfos.Add(info); // add each info
							}
						}
					}
					
					// Notify DocumentsWriter that the flushed count just increased
					docWriter.UpdateFlushedDocCount(docCount);
					
					Optimize();
					
					success = true;
				}
				finally
				{
					if (success)
					{
						CommitTransaction();
					}
					else
					{
						RollbackTransaction();
					}
				}
			}
			catch (System.OutOfMemoryException oom)
			{
				HandleOOM(oom, "addIndexes(Directory[])");
			}
			finally
			{
				if (docWriter != null)
				{
					docWriter.ResumeAllThreads();
				}
			}
		}
Ejemplo n.º 15
0
        protected OneMerge MakeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge)
        {
            bool doCFS;
            if (!useCompoundFile)
            {
                doCFS = false;
            }
            else if (noCFSRatio == 1.0)
            {
                doCFS = true;
            }
            else
            {
                long totSize = 0;
                for (int i = 0; i < infos.Count; i++)
                {
                    totSize += Size(infos.Info(i));
                }
                long mergeSize = 0;
                for (int i = 0; i < infosToMerge.Count; i++)
                {
                    mergeSize += Size(infosToMerge.Info(i));
                }

                doCFS = mergeSize <= noCFSRatio * totSize;
            }

            return new OneMerge(infosToMerge, doCFS);
        }
Ejemplo n.º 16
0
		/// <summary> Finds merges necessary to expunge all deletes from the
		/// index.  We simply merge adjacent segments that have
		/// deletes, up to mergeFactor at a time.
		/// </summary>
		public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
		{
			int numSegments = segmentInfos.Count;
			
			if (Verbose())
				Message("findMergesToExpungeDeletes: " + numSegments + " segments");
			
			MergeSpecification spec = new MergeSpecification();
			int firstSegmentWithDeletions = - 1;
			for (int i = 0; i < numSegments; i++)
			{
				SegmentInfo info = segmentInfos.Info(i);
				int delCount = writer.NumDeletedDocs(info);
				if (delCount > 0)
				{
					if (Verbose())
						Message("  segment " + info.name + " has deletions");
					if (firstSegmentWithDeletions == - 1)
						firstSegmentWithDeletions = i;
					else if (i - firstSegmentWithDeletions == mergeFactor)
					{
						// We've seen mergeFactor segments in a row with
						// deletions, so force a merge now:
						if (Verbose())
							Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
						firstSegmentWithDeletions = i;
					}
				}
				else if (firstSegmentWithDeletions != - 1)
				{
					// End of a sequence of segments with deletions, so,
					// merge those past segments even if it's fewer than
					// mergeFactor segments
					if (Verbose())
						Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
					firstSegmentWithDeletions = - 1;
				}
			}
			
			if (firstSegmentWithDeletions != - 1)
			{
				if (Verbose())
					Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments)));
			}
			
			return spec;
		}
Ejemplo n.º 17
0
		internal bool ApplyDeletes(SegmentInfos infos)
		{
			lock (this)
			{
				
				if (!HasDeletes())
					return false;
				
				if (infoStream != null)
					Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + deletesFlushed.docIDs.Count + " deleted docIDs and " + deletesFlushed.queries.Count + " deleted queries on " + (+ infos.Count) + " segments.");
				
				int infosEnd = infos.Count;
				
				int docStart = 0;
				bool any = false;
				for (int i = 0; i < infosEnd; i++)
				{
					
					// Make sure we never attempt to apply deletes to
					// segment in external dir
					System.Diagnostics.Debug.Assert(infos.Info(i).dir == directory);
					
					SegmentReader reader = writer.readerPool.Get(infos.Info(i), false);
					try
					{
						any |= ApplyDeletes(reader, docStart);
						docStart += reader.MaxDoc();
					}
					finally
					{
						writer.readerPool.Release(reader);
					}
				}
				
				deletesFlushed.Clear();
				
				return any;
			}
		}
Ejemplo n.º 18
0
        /// <summary>Returns a {@link Status} instance detailing
        /// the state of the index.
        ///
        /// </summary>
        /// <param name="onlySegments">list of specific segment names to check
        ///
        /// <p/>As this method checks every byte in the specified
        /// segments, on a large index it can take quite a long
        /// time to run.
        ///
        /// <p/><b>WARNING</b>: make sure
        /// you only call this when the index is not opened by any
        /// writer.
        /// </param>
        public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis    = new SegmentInfos();
            Status       result = new Status();

            result.dir = dir;
            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read any segments file in directory");
                result.missingSegments = true;
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                return(result);
            }

            int numSegments = sis.Count;

            System.String segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput    input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.cantOpenSegments = true;
                return(result);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.missingSegmentVersion = true;
                return(result);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else
            {
                if (format == SegmentInfos.FORMAT_CHECKSUM)
                {
                    sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                {
                    sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_HAS_PROX)
                {
                    sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_USER_DATA)
                {
                    sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
                }
                else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
                }
                else if (format < SegmentInfos.CURRENT_FORMAT)
                {
                    sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                    skip    = true;
                }
                else
                {
                    sFormat = format + " [Lucene 1.3 or prior]";
                }
            }

            result.segmentsFileName = segmentsFileName;
            result.numSegments      = numSegments;
            result.segmentFormat    = sFormat;
            result.userData         = sis.GetUserData();
            System.String userDataString;
            if (sis.GetUserData().Count > 0)
            {
                userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData());
            }
            else
            {
                userDataString = "";
            }

            Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                }
                System.Collections.IEnumerator it = onlySegments.GetEnumerator();
                while (it.MoveNext())
                {
                    if (infoStream != null)
                    {
                        infoStream.Write(" " + it.Current);
                    }
                }
                System.Collections.IEnumerator e = onlySegments.GetEnumerator();
                while (e.MoveNext() == true)
                {
                    result.segmentsChecked.Add(e.Current);
                }
                Msg(":");
            }

            if (skip)
            {
                Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.toolOutOfDate = true;
                return(result);
            }


            result.newSegments = (SegmentInfos)sis.Clone();
            result.newSegments.Clear();

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                if (onlySegments != null && !onlySegments.Contains(info.name))
                {
                    continue;
                }
                Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
                result.segmentInfos.Add(segInfoStat);
                Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                segInfoStat.name     = info.name;
                segInfoStat.docCount = info.docCount;

                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    Msg("    compound=" + info.GetUseCompoundFile());
                    segInfoStat.compound = info.GetUseCompoundFile();
                    Msg("    hasProx=" + info.GetHasProx());
                    segInfoStat.hasProx = info.GetHasProx();
                    Msg("    numFiles=" + info.Files().Count);
                    segInfoStat.numFiles = info.Files().Count;
                    Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics();
                    segInfoStat.diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg("    diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics));
                    }

                    int docStoreOffset = info.GetDocStoreOffset();
                    if (docStoreOffset != -1)
                    {
                        Msg("    docStoreOffset=" + docStoreOffset);
                        segInfoStat.docStoreOffset = docStoreOffset;
                        Msg("    docStoreSegment=" + info.GetDocStoreSegment());
                        segInfoStat.docStoreSegment = info.GetDocStoreSegment();
                        Msg("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
                        segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        Msg("    no deletions");
                        segInfoStat.hasDeletions = false;
                    }
                    else
                    {
                        Msg("    has deletions [delFileName=" + delFileName + "]");
                        segInfoStat.hasDeletions      = true;
                        segInfoStat.deletionsFileName = delFileName;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = SegmentReader.Get(info);

                    segInfoStat.openReaderPassed = true;

                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions())
                    {
                        if (reader.deletedDocs.Count() != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (reader.deletedDocs.Count() > reader.MaxDoc())
                        {
                            throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (info.docCount - numDocs != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        segInfoStat.numDeleted = info.docCount - numDocs;
                        Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.GetDelCount() != 0)
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        Msg("OK");
                    }
                    if (reader.MaxDoc() != info.docCount)
                    {
                        throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount);
                    }

                    // Test getFieldNames()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    Msg("OK [" + fieldNames.Count + " fields]");
                    segInfoStat.numFields = fieldNames.Count;

                    // Test Field Norms
                    segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);

                    // Test the Term Index
                    segInfoStat.termIndexStatus = TestTermIndex(info, reader);

                    // Test Stored Fields
                    segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);

                    // Test Term Vectors
                    segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);

                    // Rethrow the first exception we encountered
                    //  This will cause stats for failed segments to be incremented properly
                    if (segInfoStat.fieldNormStatus.error != null)
                    {
                        throw new System.SystemException("Field Norm test failed");
                    }
                    else if (segInfoStat.termIndexStatus.error != null)
                    {
                        throw new System.SystemException("Term Index test failed");
                    }
                    else if (segInfoStat.storedFieldStatus.error != null)
                    {
                        throw new System.SystemException("Stored Field test failed");
                    }
                    else if (segInfoStat.termVectorStatus.error != null)
                    {
                        throw new System.SystemException("Term Vector test failed");
                    }

                    Msg("");
                }
                catch (System.Exception t)
                {
                    Msg("FAILED");
                    System.String comment;
                    comment = "fixIndex() would remove reference to this segment";
                    Msg("    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        infoStream.WriteLine(t.StackTrace);
                    }
                    Msg("");
                    result.totLoseDocCount += toLoseDocCount;
                    result.numBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                result.newSegments.Add(info.Clone());
            }

            if (0 == result.numBadSegments)
            {
                result.clean = true;
                Msg("No problems were detected with this index.\n");
            }
            else
            {
                Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
            }

            return(result);
        }
Ejemplo n.º 19
0
		/// <summary> Merges all segments from an array of indexes into this
		/// index.
		/// 
		/// <p/>This may be used to parallelize batch indexing.  A large document
		/// collection can be broken into sub-collections.  Each sub-collection can be
		/// indexed in parallel, on a different thread, process or machine.  The
		/// complete index can then be created by merging sub-collection indexes
		/// with this method.
		/// 
		/// <p/><b>NOTE:</b> the index in each Directory must not be
		/// changed (opened by a writer) while this method is
		/// running.  This method does not acquire a write lock in
		/// each input Directory, so it is up to the caller to
		/// enforce this.
		/// 
		/// <p/><b>NOTE:</b> while this is running, any attempts to
		/// add or delete documents (with another thread) will be
		/// paused until this method completes.
		/// 
		/// <p/>This method is transactional in how Exceptions are
		/// handled: it does not commit a new segments_N file until
		/// all indexes are added.  This means if an Exception
		/// occurs (for example disk full), then either no indexes
		/// will have been added or they all will have been.<p/>
		/// 
		/// <p/>Note that this requires temporary free space in the
		/// Directory up to 2X the sum of all input indexes
		/// (including the starting index).  If readers/searchers
		/// are open against the starting index, then temporary
		/// free space required will be higher by the size of the
		/// starting index (see {@link #Optimize()} for details).
		/// <p/>
		/// 
		/// <p/>Once this completes, the final size of the index
		/// will be less than the sum of all input index sizes
		/// (including the starting index).  It could be quite a
		/// bit smaller (if there were many pending deletes) or
		/// just slightly smaller.<p/>
		/// 
		/// <p/>
		/// This requires this index not be among those to be added.
		/// 
		/// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
		/// you should immediately close the writer.  See <a
		/// href="#OOME">above</a> for details.<p/>
		/// 
		/// </summary>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public virtual void  AddIndexesNoOptimize(Directory[] dirs)
		{
			
			EnsureOpen();
			
			NoDupDirs(dirs);
			
			// Do not allow add docs or deletes while we are running:
			docWriter.PauseAllThreads();
			
			try
			{
				if (infoStream != null)
					Message("flush at addIndexesNoOptimize");
				Flush(true, false, true);
				
				bool success = false;
				
				StartTransaction(false);
				
				try
				{
					
					int docCount = 0;
					lock (this)
					{
						EnsureOpen();
						
						for (int i = 0; i < dirs.Length; i++)
						{
							if (directory == dirs[i])
							{
								// cannot add this index: segments may be deleted in merge before added
								throw new System.ArgumentException("Cannot add this index to itself");
							}
							
							SegmentInfos sis = new SegmentInfos(); // read infos from dir
							sis.Read(dirs[i]);
							for (int j = 0; j < sis.Count; j++)
							{
								SegmentInfo info = sis.Info(j);
								System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info), "dup info dir=" + info.dir + " name=" + info.name);
								docCount += info.docCount;
								segmentInfos.Add(info); // add each info
							}
						}
					}
					
					// Notify DocumentsWriter that the flushed count just increased
					docWriter.UpdateFlushedDocCount(docCount);
					
					MaybeMerge();
					
					EnsureOpen();
					
					// If after merging there remain segments in the index
					// that are in a different directory, just copy these
					// over into our index.  This is necessary (before
					// finishing the transaction) to avoid leaving the
					// index in an unusable (inconsistent) state.
					ResolveExternalSegments();
					
					EnsureOpen();
					
					success = true;
				}
				finally
				{
					if (success)
					{
						CommitTransaction();
					}
					else
					{
						RollbackTransaction();
					}
				}
			}
			catch (System.OutOfMemoryException oom)
			{
				HandleOOM(oom, "addIndexesNoOptimize");
			}
			finally
			{
				if (docWriter != null)
				{
					docWriter.ResumeAllThreads();
				}
			}
		}
Ejemplo n.º 20
0
		private bool IsOptimized(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize)
		{
			int numSegments = infos.Count;
			int numToOptimize = 0;
			SegmentInfo optimizeInfo = null;
			for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++)
			{
				SegmentInfo info = infos.Info(i);
				if (segmentsToOptimize.Contains(info))
				{
					numToOptimize++;
					optimizeInfo = info;
				}
			}
			
			return numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo));
		}
Ejemplo n.º 21
0
        /// <summary>Checks if any merges are now necessary and returns a
        /// {@link MergePolicy.MergeSpecification} if so.  A merge
        /// is necessary when there are more than {@link
        /// #setMergeFactor} segments at a given level.  When
        /// multiple levels have too many segments, this method
        /// will return multiple merges, allowing the {@link
        /// MergeScheduler} to use concurrency.
        /// </summary>
        public override MergeSpecification FindMerges(SegmentInfos infos)
        {
            int numSegments = infos.Count;

            if (Verbose())
            {
                Message("findMerges: " + numSegments + " segments");
            }

            // Compute levels, which is just log (base mergeFactor)
            // of the size of each segment
            float[] levels = new float[numSegments];
            float   norm   = (float)System.Math.Log(mergeFactor);

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = infos.Info(i);
                long        size = Size(info);

                // Floor tiny segments
                if (size < 1)
                {
                    size = 1;
                }
                levels[i] = (float)System.Math.Log(size) / norm;
            }

            float levelFloor;

            if (minMergeSize <= 0)
            {
                levelFloor = (float)0.0;
            }
            else
            {
                levelFloor = (float)(System.Math.Log(minMergeSize) / norm);
            }

            // Now, we quantize the log values into levels.  The
            // first level is any segment whose log size is within
            // LEVEL_LOG_SPAN of the max size, or, who has such as
            // segment "to the right".  Then, we find the max of all
            // other segments and use that to define the next level
            // segment, etc.

            MergeSpecification spec = null;

            int start = 0;

            while (start < numSegments)
            {
                // Find max level of all segments not already
                // quantized.
                float maxLevel = levels[start];
                for (int i = 1 + start; i < numSegments; i++)
                {
                    float level = levels[i];
                    if (level > maxLevel)
                    {
                        maxLevel = level;
                    }
                }

                // Now search backwards for the rightmost segment that
                // falls into this level:
                float levelBottom;
                if (maxLevel < levelFloor)
                {
                    // All remaining segments fall into the min level
                    levelBottom = -1.0F;
                }
                else
                {
                    levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN);

                    // Force a boundary at the level floor
                    if (levelBottom < levelFloor && maxLevel >= levelFloor)
                    {
                        levelBottom = levelFloor;
                    }
                }

                int upto = numSegments - 1;
                while (upto >= start)
                {
                    if (levels[upto] >= levelBottom)
                    {
                        break;
                    }
                    upto--;
                }
                if (Verbose())
                {
                    Message("  level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
                }

                // Finally, record all merges that are viable at this level:
                int end = start + mergeFactor;
                while (end <= 1 + upto)
                {
                    bool anyTooLarge = false;
                    for (int i = start; i < end; i++)
                    {
                        SegmentInfo info = infos.Info(i);
                        anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);
                    }

                    if (!anyTooLarge)
                    {
                        if (spec == null)
                        {
                            spec = new MergeSpecification();
                        }
                        if (Verbose())
                        {
                            Message("    " + start + " to " + end + ": add this merge");
                        }
                        spec.Add(MakeOneMerge(infos, infos.Range(start, end)));
                    }
                    else if (Verbose())
                    {
                        Message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
                    }

                    start = end;
                    end   = start + mergeFactor;
                }

                start = 1 + upto;
            }

            return(spec);
        }
Ejemplo n.º 22
0
		/// <summary>Returns the merges necessary to optimize the index.
		/// This merge policy defines "optimized" to mean only one
		/// segment in the index, where that segment has no
		/// deletions pending nor separate norms, and it is in
		/// compound file format if the current useCompoundFile
		/// setting is true.  This method returns multiple merges
		/// (mergeFactor at a time) so the {@link MergeScheduler}
		/// in use may make use of concurrency. 
		/// </summary>
		public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize)
		{
			MergeSpecification spec;
			
			System.Diagnostics.Debug.Assert(maxNumSegments > 0);
			
			if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
			{
				
				// Find the newest (rightmost) segment that needs to
				// be optimized (other segments may have been flushed
				// since optimize started):
				int last = infos.Count;
				while (last > 0)
				{
					SegmentInfo info = infos.Info(--last);
					if (segmentsToOptimize.Contains(info))
					{
						last++;
						break;
					}
				}
				
				if (last > 0)
				{
					
					spec = new MergeSpecification();
					
					// First, enroll all "full" merges (size
					// mergeFactor) to potentially be run concurrently:
					while (last - maxNumSegments + 1 >= mergeFactor)
					{
                        spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last)));
						last -= mergeFactor;
					}
					
					// Only if there are no full merges pending do we
					// add a final partial (< mergeFactor segments) merge:
					if (0 == spec.merges.Count)
					{
						if (maxNumSegments == 1)
						{
							
							// Since we must optimize down to 1 segment, the
							// choice is simple:
							if (last > 1 || !IsOptimized(infos.Info(0)))
                                spec.Add(MakeOneMerge(infos, infos.Range(0, last)));
						}
						else if (last > maxNumSegments)
						{
							
							// Take care to pick a partial merge that is
							// least cost, but does not make the index too
							// lopsided.  If we always just picked the
							// partial tail then we could produce a highly
							// lopsided index over time:
							
							// We must merge this many segments to leave
							// maxNumSegments in the index (from when
							// optimize was first kicked off):
							int finalMergeSize = last - maxNumSegments + 1;
							
							// Consider all possible starting points:
							long bestSize = 0;
							int bestStart = 0;
							
							for (int i = 0; i < last - finalMergeSize + 1; i++)
							{
								long sumSize = 0;
								for (int j = 0; j < finalMergeSize; j++)
									sumSize += Size(infos.Info(j + i));
								if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
								{
									bestStart = i;
									bestSize = sumSize;
								}
							}

                            spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize)));
						}
					}
				}
				else
					spec = null;
			}
			else
				spec = null;
			
			return spec;
		}
Ejemplo n.º 23
0
			/// <summary>Forcefully clear changes for the specifed segments,
			/// and remove from the pool.   This is called on succesful merge. 
			/// </summary>
			internal virtual void  Clear(SegmentInfos infos)
			{
				lock (this)
				{
					if (infos == null)
					{
                        System.Collections.IEnumerator iter = new System.Collections.Hashtable(readerMap).GetEnumerator();
						while (iter.MoveNext())
						{
							System.Collections.DictionaryEntry ent = (System.Collections.DictionaryEntry) iter.Current;
							((SegmentReader) ent.Value).hasChanges = false;
						}
					}
					else
					{
						int numSegments = infos.Count;
						for (int i = 0; i < numSegments; i++)
						{
							SegmentInfo info = infos.Info(i);
							if (readerMap.Contains(info))
							{
								((SegmentReader) readerMap[info]).hasChanges = false;
							}
						}
					}
				}
			}
Ejemplo n.º 24
0
		/// <summary>Checks if any merges are now necessary and returns a
		/// {@link MergePolicy.MergeSpecification} if so.  A merge
		/// is necessary when there are more than {@link
		/// #setMergeFactor} segments at a given level.  When
		/// multiple levels have too many segments, this method
		/// will return multiple merges, allowing the {@link
		/// MergeScheduler} to use concurrency. 
		/// </summary>
		public override MergeSpecification FindMerges(SegmentInfos infos)
		{
			
			int numSegments = infos.Count;
			if (Verbose())
				Message("findMerges: " + numSegments + " segments");
			
			// Compute levels, which is just log (base mergeFactor)
			// of the size of each segment
			float[] levels = new float[numSegments];
			float norm = (float) System.Math.Log(mergeFactor);
			
			for (int i = 0; i < numSegments; i++)
			{
				SegmentInfo info = infos.Info(i);
				long size = Size(info);
				
				// Floor tiny segments
				if (size < 1)
					size = 1;
				levels[i] = (float) System.Math.Log(size) / norm;
			}
			
			float levelFloor;
			if (minMergeSize <= 0)
				levelFloor = (float) 0.0;
			else
			{
				levelFloor = (float) (System.Math.Log(minMergeSize) / norm);
			}
			
			// Now, we quantize the log values into levels.  The
			// first level is any segment whose log size is within
			// LEVEL_LOG_SPAN of the max size, or, who has such as
			// segment "to the right".  Then, we find the max of all
			// other segments and use that to define the next level
			// segment, etc.
			
			MergeSpecification spec = null;
			
			int start = 0;
			while (start < numSegments)
			{
				
				// Find max level of all segments not already
				// quantized.
				float maxLevel = levels[start];
				for (int i = 1 + start; i < numSegments; i++)
				{
					float level = levels[i];
					if (level > maxLevel)
						maxLevel = level;
				}
				
				// Now search backwards for the rightmost segment that
				// falls into this level:
				float levelBottom;
				if (maxLevel < levelFloor)
				// All remaining segments fall into the min level
					levelBottom = - 1.0F;
				else
				{
					levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);
					
					// Force a boundary at the level floor
					if (levelBottom < levelFloor && maxLevel >= levelFloor)
						levelBottom = levelFloor;
				}
				
				int upto = numSegments - 1;
				while (upto >= start)
				{
					if (levels[upto] >= levelBottom)
					{
						break;
					}
					upto--;
				}
				if (Verbose())
					Message("  level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
				
				// Finally, record all merges that are viable at this level:
				int end = start + mergeFactor;
				while (end <= 1 + upto)
				{
					bool anyTooLarge = false;
					for (int i = start; i < end; i++)
					{
						SegmentInfo info = infos.Info(i);
						anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);
					}
					
					if (!anyTooLarge)
					{
						if (spec == null)
							spec = new MergeSpecification();
						if (Verbose())
							Message("    " + start + " to " + end + ": add this merge");
                        spec.Add(MakeOneMerge(infos, infos.Range(start, end)));
					}
					else if (Verbose())
						Message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
					
					start = end;
					end = start + mergeFactor;
				}
				
				start = 1 + upto;
			}
			
			return spec;
		}
Ejemplo n.º 25
0
        /// <summary>Returns the merges necessary to optimize the index.
        /// This merge policy defines "optimized" to mean only one
        /// segment in the index, where that segment has no
        /// deletions pending nor separate norms, and it is in
        /// compound file format if the current useCompoundFile
        /// setting is true.  This method returns multiple merges
        /// (mergeFactor at a time) so the {@link MergeScheduler}
        /// in use may make use of concurrency.
        /// </summary>
        public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize)
        {
            MergeSpecification spec;

            System.Diagnostics.Debug.Assert(maxNumSegments > 0);

            if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
            {
                // Find the newest (rightmost) segment that needs to
                // be optimized (other segments may have been flushed
                // since optimize started):
                int last = infos.Count;
                while (last > 0)
                {
                    SegmentInfo info = infos.Info(--last);
                    if (segmentsToOptimize.Contains(info))
                    {
                        last++;
                        break;
                    }
                }

                if (last > 0)
                {
                    spec = new MergeSpecification();

                    // First, enroll all "full" merges (size
                    // mergeFactor) to potentially be run concurrently:
                    while (last - maxNumSegments + 1 >= mergeFactor)
                    {
                        spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last)));
                        last -= mergeFactor;
                    }

                    // Only if there are no full merges pending do we
                    // add a final partial (< mergeFactor segments) merge:
                    if (0 == spec.merges.Count)
                    {
                        if (maxNumSegments == 1)
                        {
                            // Since we must optimize down to 1 segment, the
                            // choice is simple:
                            if (last > 1 || !IsOptimized(infos.Info(0)))
                            {
                                spec.Add(MakeOneMerge(infos, infos.Range(0, last)));
                            }
                        }
                        else if (last > maxNumSegments)
                        {
                            // Take care to pick a partial merge that is
                            // least cost, but does not make the index too
                            // lopsided.  If we always just picked the
                            // partial tail then we could produce a highly
                            // lopsided index over time:

                            // We must merge this many segments to leave
                            // maxNumSegments in the index (from when
                            // optimize was first kicked off):
                            int finalMergeSize = last - maxNumSegments + 1;

                            // Consider all possible starting points:
                            long bestSize  = 0;
                            int  bestStart = 0;

                            for (int i = 0; i < last - finalMergeSize + 1; i++)
                            {
                                long sumSize = 0;
                                for (int j = 0; j < finalMergeSize; j++)
                                {
                                    sumSize += Size(infos.Info(j + i));
                                }
                                if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
                                {
                                    bestStart = i;
                                    bestSize  = sumSize;
                                }
                            }

                            spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize)));
                        }
                    }
                }
                else
                {
                    spec = null;
                }
            }
            else
            {
                spec = null;
            }

            return(spec);
        }
Ejemplo n.º 26
0
			public CommitPoint(IndexFileDeleter enclosingInstance, System.Collections.ICollection commitsToDelete, Directory directory, SegmentInfos segmentInfos)
			{
				InitBlock(enclosingInstance);
				this.directory = directory;
				this.commitsToDelete = commitsToDelete;
				userData = segmentInfos.GetUserData();
				segmentsFileName = segmentInfos.GetCurrentSegmentFileName();
				version = segmentInfos.GetVersion();
				generation = segmentInfos.GetGeneration();
                files = segmentInfos.Files(directory, true);
				gen = segmentInfos.GetGeneration();
				isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions();
				
				System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory));
			}
Ejemplo n.º 27
0
        /// <summary> Finds merges necessary to expunge all deletes from the
        /// index.  We simply merge adjacent segments that have
        /// deletes, up to mergeFactor at a time.
        /// </summary>
        public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
        {
            int numSegments = segmentInfos.Count;

            if (Verbose())
            {
                Message("findMergesToExpungeDeletes: " + numSegments + " segments");
            }

            MergeSpecification spec       = new MergeSpecification();
            int firstSegmentWithDeletions = -1;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info     = segmentInfos.Info(i);
                int         delCount = writer.NumDeletedDocs(info);
                if (delCount > 0)
                {
                    if (Verbose())
                    {
                        Message("  segment " + info.name + " has deletions");
                    }
                    if (firstSegmentWithDeletions == -1)
                    {
                        firstSegmentWithDeletions = i;
                    }
                    else if (i - firstSegmentWithDeletions == mergeFactor)
                    {
                        // We've seen mergeFactor segments in a row with
                        // deletions, so force a merge now:
                        if (Verbose())
                        {
                            Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        }
                        spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
                        firstSegmentWithDeletions = i;
                    }
                }
                else if (firstSegmentWithDeletions != -1)
                {
                    // End of a sequence of segments with deletions, so,
                    // merge those past segments even if it's fewer than
                    // mergeFactor segments
                    if (Verbose())
                    {
                        Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    }
                    spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
                    firstSegmentWithDeletions = -1;
                }
            }

            if (firstSegmentWithDeletions != -1)
            {
                if (Verbose())
                {
                    Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                }
                spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments)));
            }

            return(spec);
        }