示例#1
0
        /// <summary> Returns userData from latest segments file</summary>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public static System.Collections.Generic.IDictionary <string, string> ReadCurrentUserData(Directory directory)
        {
            SegmentInfos sis = new SegmentInfos();

            sis.Read(directory);
            return(sis.GetUserData());
        }
示例#2
0
			public /*protected internal*/ override System.Object DoBody(System.String segmentFileName)
			{
				SegmentInfos infos = new SegmentInfos();
				infos.Read(directory, segmentFileName);
				if (readOnly)
					return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy, termInfosIndexDivisor);
				else
					return new DirectoryReader(directory, infos, deletionPolicy, false, termInfosIndexDivisor);
			}
示例#3
0
        /// <summary> Current version number from segments file.</summary>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public static long ReadCurrentVersion(Directory directory)
        {
            // Fully read the segments file: this ensures that it's
            // completely written so that if
            // IndexWriter.prepareCommit has been called (but not
            // yet commit), then the reader will still see itself as
            // current:
            SegmentInfos sis = new SegmentInfos();

            sis.Read(directory);
            return(sis.version);
            //return (long) ((System.Int64) new AnonymousClassFindSegmentsFile1(directory).Run());
            //DIGY: AnonymousClassFindSegmentsFile1 can safely be deleted
        }
示例#4
0
		/// <summary> Initialize the deleter: find all previous commits in
		/// the Directory, incref the files they reference, call
		/// the policy to let it delete commits.  This will remove
		/// any files not referenced by any of the commits.
		/// </summary>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
        public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.StreamWriter infoStream, DocumentsWriter docWriter, System.Collections.Generic.Dictionary<string, string> synced)
		{
			
			this.docWriter = docWriter;
			this.infoStream = infoStream;
            this.synced = synced;
			
			if (infoStream != null)
			{
				Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy);
			}
			
			this.policy = policy;
			this.directory = directory;
			
			// First pass: walk the files and initialize our ref
			// counts:
			long currentGen = segmentInfos.GetGeneration();
			IndexFileNameFilter filter = IndexFileNameFilter.GetFilter();
			
			System.String[] files = directory.ListAll();
			
			CommitPoint currentCommitPoint = null;
			
			for (int i = 0; i < files.Length; i++)
			{
				
				System.String fileName = files[i];
				
				if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN))
				{
					
					// Add this file to refCounts with initial count 0:
					GetRefCount(fileName);
					
					if (fileName.StartsWith(IndexFileNames.SEGMENTS))
					{
						
						// This is a commit (segments or segments_N), and
						// it's valid (<= the max gen).  Load it, then
						// incref all files it refers to:
                        if (infoStream != null)
                        {
                            Message("init: load commit \"" + fileName + "\"");
                        }
                        SegmentInfos sis = new SegmentInfos();
                        try
                        {
                            sis.Read(directory, fileName);
                        }
                        catch (System.IO.FileNotFoundException e)
                        {
                            // LUCENE-948: on NFS (and maybe others), if
                            // you have writers switching back and forth
                            // between machines, it's very likely that the
                            // dir listing will be stale and will claim a
                            // file segments_X exists when in fact it
                            // doesn't.  So, we catch this and handle it
                            // as if the file does not exist
                            if (infoStream != null)
                            {
                                Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
                            }
                            sis = null;
                        }
                        catch (System.IO.IOException e)
                        {
                            if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen)
                            {
                                throw e;
                            }
                            else
                            {
                                // Most likely we are opening an index that
                                // has an aborted "future" commit, so suppress
                                // exc in this case
                                sis = null;
                            }
                        }
                        if (sis != null)
                        {
                            CommitPoint commitPoint = new CommitPoint(this,commitsToDelete, directory, sis);
                            if (sis.GetGeneration() == segmentInfos.GetGeneration())
                            {
                                currentCommitPoint = commitPoint;
                            }
                            commits.Add(commitPoint);
                            IncRef(sis, true);

                            if (lastSegmentInfos == null || sis.GetGeneration() > lastSegmentInfos.GetGeneration())
                            {
                                lastSegmentInfos = sis;
                            }
						}
					}
				}
			}
			
			if (currentCommitPoint == null)
			{
				// We did not in fact see the segments_N file
				// corresponding to the segmentInfos that was passed
				// in.  Yet, it must exist, because our caller holds
				// the write lock.  This can happen when the directory
				// listing was stale (eg when index accessed via NFS
				// client with stale directory listing cache).  So we
				// try now to explicitly open this commit point:
				SegmentInfos sis = new SegmentInfos();
				try
				{
					sis.Read(directory, segmentInfos.GetCurrentSegmentFileName());
				}
				catch (System.IO.IOException e)
				{
					throw new CorruptIndexException("failed to locate current segments_N file");
				}
				if (infoStream != null)
					Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName());
				currentCommitPoint = new CommitPoint(this, commitsToDelete, directory, sis);
				commits.Add(currentCommitPoint);
				IncRef(sis, true);
			}
			
			// We keep commits list in sorted order (oldest to newest):
			commits.Sort();
			
			// Now delete anything with ref count at 0.  These are
			// presumably abandoned files eg due to crash of
			// IndexWriter.
			System.Collections.Generic.IEnumerator<System.Collections.Generic.KeyValuePair<System.String, RefCount>> it = refCounts.GetEnumerator();
			while (it.MoveNext())
			{
				System.String fileName = (System.String) it.Current.Key;
				RefCount rc = (RefCount) refCounts[fileName];
				if (0 == rc.count)
				{
					if (infoStream != null)
					{
						Message("init: removing unreferenced file \"" + fileName + "\"");
					}
					DeleteFile(fileName);
				}
			}
			
			// Finally, give policy a chance to remove things on
			// startup:
			policy.OnInit(commits);
			
			// Always protect the incoming segmentInfos since
			// sometime it may not be the most recent commit
			Checkpoint(segmentInfos, false);
			
			startingCommitDeleted = currentCommitPoint.IsDeleted();
			
			DeleteCommits();
		}
示例#5
0
		/// <summary> Returns userData from latest segments file</summary>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
        public static System.Collections.Generic.IDictionary<string, string> ReadCurrentUserData(Directory directory)
		{
			SegmentInfos sis = new SegmentInfos();
			sis.Read(directory);
			return sis.GetUserData();
		}
示例#6
0
		/// <summary> Current version number from segments file.</summary>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public static long ReadCurrentVersion(Directory directory)
		{
            // Fully read the segments file: this ensures that it's
            // completely written so that if
            // IndexWriter.prepareCommit has been called (but not
            // yet commit), then the reader will still see itself as
            // current:
            SegmentInfos sis = new SegmentInfos();
            sis.Read(directory);
            return sis.version;
			//return (long) ((System.Int64) new AnonymousClassFindSegmentsFile1(directory).Run());
            //DIGY: AnonymousClassFindSegmentsFile1 can safely be deleted
		}
示例#7
0
		/// <summary>Returns a {@link Status} instance detailing
		/// the state of the index.
		/// 
		/// </summary>
		/// <param name="onlySegments">list of specific segment names to check
		/// 
		/// <p/>As this method checks every byte in the specified
		/// segments, on a large index it can take quite a long
		/// time to run.
		/// 
		/// <p/><b>WARNING</b>: make sure
		/// you only call this when the index is not opened by any
		/// writer. 
		/// </param>
		public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments)
		{
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
			SegmentInfos sis = new SegmentInfos();
			Status result = new Status();
			result.dir = dir;
			try
			{
				sis.Read(dir);
			}
			catch (System.Exception t)
			{
				Msg("ERROR: could not read any segments file in directory");
				result.missingSegments = true;
				if (infoStream != null)
					infoStream.WriteLine(t.StackTrace);
				return result;
			}
			
			int numSegments = sis.Count;
			System.String segmentsFileName = sis.GetCurrentSegmentFileName();
			IndexInput input = null;
			try
			{
				input = dir.OpenInput(segmentsFileName);
			}
			catch (System.Exception t)
			{
				Msg("ERROR: could not open segments file in directory");
				if (infoStream != null)
					infoStream.WriteLine(t.StackTrace);
				result.cantOpenSegments = true;
				return result;
			}
			int format = 0;
			try
			{
				format = input.ReadInt();
			}
			catch (System.Exception t)
			{
				Msg("ERROR: could not read segment file version in directory");
				if (infoStream != null)
					infoStream.WriteLine(t.StackTrace);
				result.missingSegmentVersion = true;
				return result;
			}
			finally
			{
				if (input != null)
					input.Close();
			}
			
			System.String sFormat = "";
			bool skip = false;
			
			if (format == SegmentInfos.FORMAT)
				sFormat = "FORMAT [Lucene Pre-2.1]";
			if (format == SegmentInfos.FORMAT_LOCKLESS)
				sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
			else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
				sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
			else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
				sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
			else
			{
				if (format == SegmentInfos.FORMAT_CHECKSUM)
					sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
				else if (format == SegmentInfos.FORMAT_DEL_COUNT)
					sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
				else if (format == SegmentInfos.FORMAT_HAS_PROX)
					sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
				else if (format == SegmentInfos.FORMAT_USER_DATA)
					sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
				else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
					sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
				else if (format < SegmentInfos.CURRENT_FORMAT)
				{
					sFormat = "int=" + format + " [newer version of Lucene than this tool]";
					skip = true;
				}
				else
				{
					sFormat = format + " [Lucene 1.3 or prior]";
				}
			}
			
			result.segmentsFileName = segmentsFileName;
			result.numSegments = numSegments;
			result.segmentFormat = sFormat;
			result.userData = sis.GetUserData();
			System.String userDataString;
			if (sis.GetUserData().Count > 0)
			{
				userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData());
			}
			else
			{
				userDataString = "";
			}
			
			Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
			
			if (onlySegments != null)
			{
				result.partial = true;
				if (infoStream != null)
					infoStream.Write("\nChecking only these segments:");
				System.Collections.IEnumerator it = onlySegments.GetEnumerator();
				while (it.MoveNext())
				{
					if (infoStream != null)
					{
						infoStream.Write(" " + it.Current);
					}
				}
                System.Collections.IEnumerator e = onlySegments.GetEnumerator();
                while (e.MoveNext() == true)
                {
                    result.segmentsChecked.Add(e.Current);
                }
                Msg(":");
			}
			
			if (skip)
			{
				Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
				result.toolOutOfDate = true;
				return result;
			}
			
			
			result.newSegments = (SegmentInfos) sis.Clone();
			result.newSegments.Clear();
			
			for (int i = 0; i < numSegments; i++)
			{
				SegmentInfo info = sis.Info(i);
				if (onlySegments != null && !onlySegments.Contains(info.name))
					continue;
				Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
				result.segmentInfos.Add(segInfoStat);
				Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
				segInfoStat.name = info.name;
				segInfoStat.docCount = info.docCount;
				
				int toLoseDocCount = info.docCount;
				
				SegmentReader reader = null;
				
				try
				{
					Msg("    compound=" + info.GetUseCompoundFile());
					segInfoStat.compound = info.GetUseCompoundFile();
					Msg("    hasProx=" + info.GetHasProx());
					segInfoStat.hasProx = info.GetHasProx();
					Msg("    numFiles=" + info.Files().Count);
					segInfoStat.numFiles = info.Files().Count;
					Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
					segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    System.Collections.Generic.IDictionary<string, string> diagnostics = info.GetDiagnostics();
					segInfoStat.diagnostics = diagnostics;
					if (diagnostics.Count > 0)
					{
						Msg("    diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics));
					}
					
					int docStoreOffset = info.GetDocStoreOffset();
					if (docStoreOffset != - 1)
					{
						Msg("    docStoreOffset=" + docStoreOffset);
						segInfoStat.docStoreOffset = docStoreOffset;
						Msg("    docStoreSegment=" + info.GetDocStoreSegment());
						segInfoStat.docStoreSegment = info.GetDocStoreSegment();
						Msg("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
						segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
					}
					System.String delFileName = info.GetDelFileName();
					if (delFileName == null)
					{
						Msg("    no deletions");
						segInfoStat.hasDeletions = false;
					}
					else
					{
						Msg("    has deletions [delFileName=" + delFileName + "]");
						segInfoStat.hasDeletions = true;
						segInfoStat.deletionsFileName = delFileName;
					}
					if (infoStream != null)
						infoStream.Write("    test: open reader.........");
					reader = SegmentReader.Get(info);
					
					segInfoStat.openReaderPassed = true;
					
					int numDocs = reader.NumDocs();
					toLoseDocCount = numDocs;
					if (reader.HasDeletions())
					{
						if (reader.deletedDocs.Count() != info.GetDelCount())
						{
							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
						}
						if (reader.deletedDocs.Count() > reader.MaxDoc())
						{
							throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
						}
						if (info.docCount - numDocs != info.GetDelCount())
						{
							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
						}
						segInfoStat.numDeleted = info.docCount - numDocs;
						Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
					}
					else
					{
						if (info.GetDelCount() != 0)
						{
							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
						}
						Msg("OK");
					}
					if (reader.MaxDoc() != info.docCount)
						throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount);
					
					// Test getFieldNames()
					if (infoStream != null)
					{
						infoStream.Write("    test: fields..............");
					}
                    System.Collections.Generic.ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
					Msg("OK [" + fieldNames.Count + " fields]");
					segInfoStat.numFields = fieldNames.Count;
					
					// Test Field Norms
					segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);
					
					// Test the Term Index
					segInfoStat.termIndexStatus = TestTermIndex(info, reader);
					
					// Test Stored Fields
					segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);
					
					// Test Term Vectors
					segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);
					
					// Rethrow the first exception we encountered
					//  This will cause stats for failed segments to be incremented properly
					if (segInfoStat.fieldNormStatus.error != null)
					{
						throw new System.SystemException("Field Norm test failed");
					}
					else if (segInfoStat.termIndexStatus.error != null)
					{
						throw new System.SystemException("Term Index test failed");
					}
					else if (segInfoStat.storedFieldStatus.error != null)
					{
						throw new System.SystemException("Stored Field test failed");
					}
					else if (segInfoStat.termVectorStatus.error != null)
					{
						throw new System.SystemException("Term Vector test failed");
					}
					
					Msg("");
				}
				catch (System.Exception t)
				{
					Msg("FAILED");
					System.String comment;
					comment = "fixIndex() would remove reference to this segment";
					Msg("    WARNING: " + comment + "; full exception:");
					if (infoStream != null)
						infoStream.WriteLine(t.StackTrace);
					Msg("");
					result.totLoseDocCount += toLoseDocCount;
					result.numBadSegments++;
					continue;
				}
				finally
				{
					if (reader != null)
						reader.Close();
				}
				
				// Keeper
				result.newSegments.Add(info.Clone());
			}
			
			if (0 == result.numBadSegments)
			{
				result.clean = true;
				Msg("No problems were detected with this index.\n");
			}
			else
				Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
			
			return result;
		}
示例#8
0
			public /*protected internal*/ override System.Object DoBody(System.String segmentFileName)
			{
				SegmentInfos infos = new SegmentInfos();
				infos.Read(directory, segmentFileName);
				return Enclosing_Instance.DoReopen(infos, false, openReadOnly);
			}
示例#9
0
 public override object DoBody(string segmentFileName)
 {
     SegmentInfos infos = new SegmentInfos();
     infos.Read(this.dir, segmentFileName);
     return enclosingInstance.DoReopen(infos, false, openReadOnly);
 }
示例#10
0
		/// <seealso cref="Mono.Lucene.Net.Index.IndexReader.listCommits">
		/// </seealso>
		public static new System.Collections.ICollection ListCommits(Directory dir)
		{
			System.String[] files = dir.ListAll();
			
			System.Collections.ArrayList commits = new System.Collections.ArrayList();
			
			SegmentInfos latest = new SegmentInfos();
			latest.Read(dir);
			long currentGen = latest.GetGeneration();
			
			commits.Add(new ReaderCommit(latest, dir));
			
			for (int i = 0; i < files.Length; i++)
			{
				
				System.String fileName = files[i];
				
				if (fileName.StartsWith(IndexFileNames.SEGMENTS) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN) && SegmentInfos.GenerationFromSegmentsFileName(fileName) < currentGen)
				{
					
					SegmentInfos sis = new SegmentInfos();
					try
					{
						// IOException allowed to throw there, in case
						// segments_N is corrupt
						sis.Read(dir, fileName);
					}
					catch (System.IO.FileNotFoundException fnfe)
					{
						// LUCENE-948: on NFS (and maybe others), if
						// you have writers switching back and forth
						// between machines, it's very likely that the
						// dir listing will be stale and will claim a
						// file segments_X exists when in fact it
						// doesn't.  So, we catch this and handle it
						// as if the file does not exist
						sis = null;
					}
					
					if (sis != null)
						commits.Add(new ReaderCommit(sis, dir));
				}
			}
			
			return commits;
		}
示例#11
0
        /// <summary>Returns a {@link Status} instance detailing
        /// the state of the index.
        ///
        /// </summary>
        /// <param name="onlySegments">list of specific segment names to check
        ///
        /// <p/>As this method checks every byte in the specified
        /// segments, on a large index it can take quite a long
        /// time to run.
        ///
        /// <p/><b>WARNING</b>: make sure
        /// you only call this when the index is not opened by any
        /// writer.
        /// </param>
        public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis    = new SegmentInfos();
            Status       result = new Status();

            result.dir = dir;
            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read any segments file in directory");
                result.missingSegments = true;
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                return(result);
            }

            int numSegments = sis.Count;

            System.String segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput    input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.cantOpenSegments = true;
                return(result);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.missingSegmentVersion = true;
                return(result);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else
            {
                if (format == SegmentInfos.FORMAT_CHECKSUM)
                {
                    sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                {
                    sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_HAS_PROX)
                {
                    sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_USER_DATA)
                {
                    sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
                }
                else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
                }
                else if (format < SegmentInfos.CURRENT_FORMAT)
                {
                    sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                    skip    = true;
                }
                else
                {
                    sFormat = format + " [Lucene 1.3 or prior]";
                }
            }

            result.segmentsFileName = segmentsFileName;
            result.numSegments      = numSegments;
            result.segmentFormat    = sFormat;
            result.userData         = sis.GetUserData();
            System.String userDataString;
            if (sis.GetUserData().Count > 0)
            {
                userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData());
            }
            else
            {
                userDataString = "";
            }

            Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                }
                System.Collections.IEnumerator it = onlySegments.GetEnumerator();
                while (it.MoveNext())
                {
                    if (infoStream != null)
                    {
                        infoStream.Write(" " + it.Current);
                    }
                }
                System.Collections.IEnumerator e = onlySegments.GetEnumerator();
                while (e.MoveNext() == true)
                {
                    result.segmentsChecked.Add(e.Current);
                }
                Msg(":");
            }

            if (skip)
            {
                Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.toolOutOfDate = true;
                return(result);
            }


            result.newSegments = (SegmentInfos)sis.Clone();
            result.newSegments.Clear();

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                if (onlySegments != null && !onlySegments.Contains(info.name))
                {
                    continue;
                }
                Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
                result.segmentInfos.Add(segInfoStat);
                Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                segInfoStat.name     = info.name;
                segInfoStat.docCount = info.docCount;

                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    Msg("    compound=" + info.GetUseCompoundFile());
                    segInfoStat.compound = info.GetUseCompoundFile();
                    Msg("    hasProx=" + info.GetHasProx());
                    segInfoStat.hasProx = info.GetHasProx();
                    Msg("    numFiles=" + info.Files().Count);
                    segInfoStat.numFiles = info.Files().Count;
                    Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics();
                    segInfoStat.diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg("    diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics));
                    }

                    int docStoreOffset = info.GetDocStoreOffset();
                    if (docStoreOffset != -1)
                    {
                        Msg("    docStoreOffset=" + docStoreOffset);
                        segInfoStat.docStoreOffset = docStoreOffset;
                        Msg("    docStoreSegment=" + info.GetDocStoreSegment());
                        segInfoStat.docStoreSegment = info.GetDocStoreSegment();
                        Msg("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
                        segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        Msg("    no deletions");
                        segInfoStat.hasDeletions = false;
                    }
                    else
                    {
                        Msg("    has deletions [delFileName=" + delFileName + "]");
                        segInfoStat.hasDeletions      = true;
                        segInfoStat.deletionsFileName = delFileName;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = SegmentReader.Get(info);

                    segInfoStat.openReaderPassed = true;

                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions())
                    {
                        if (reader.deletedDocs.Count() != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (reader.deletedDocs.Count() > reader.MaxDoc())
                        {
                            throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (info.docCount - numDocs != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        segInfoStat.numDeleted = info.docCount - numDocs;
                        Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.GetDelCount() != 0)
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        Msg("OK");
                    }
                    if (reader.MaxDoc() != info.docCount)
                    {
                        throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount);
                    }

                    // Test getFieldNames()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    Msg("OK [" + fieldNames.Count + " fields]");
                    segInfoStat.numFields = fieldNames.Count;

                    // Test Field Norms
                    segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);

                    // Test the Term Index
                    segInfoStat.termIndexStatus = TestTermIndex(info, reader);

                    // Test Stored Fields
                    segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);

                    // Test Term Vectors
                    segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);

                    // Rethrow the first exception we encountered
                    //  This will cause stats for failed segments to be incremented properly
                    if (segInfoStat.fieldNormStatus.error != null)
                    {
                        throw new System.SystemException("Field Norm test failed");
                    }
                    else if (segInfoStat.termIndexStatus.error != null)
                    {
                        throw new System.SystemException("Term Index test failed");
                    }
                    else if (segInfoStat.storedFieldStatus.error != null)
                    {
                        throw new System.SystemException("Stored Field test failed");
                    }
                    else if (segInfoStat.termVectorStatus.error != null)
                    {
                        throw new System.SystemException("Term Vector test failed");
                    }

                    Msg("");
                }
                catch (System.Exception t)
                {
                    Msg("FAILED");
                    System.String comment;
                    comment = "fixIndex() would remove reference to this segment";
                    Msg("    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        infoStream.WriteLine(t.StackTrace);
                    }
                    Msg("");
                    result.totLoseDocCount += toLoseDocCount;
                    result.numBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                result.newSegments.Add(info.Clone());
            }

            if (0 == result.numBadSegments)
            {
                result.clean = true;
                Msg("No problems were detected with this index.\n");
            }
            else
            {
                Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
            }

            return(result);
        }
示例#12
0
		/// <summary> Merges all segments from an array of indexes into this
		/// index.
		/// 
		/// <p/>This may be used to parallelize batch indexing.  A large document
		/// collection can be broken into sub-collections.  Each sub-collection can be
		/// indexed in parallel, on a different thread, process or machine.  The
		/// complete index can then be created by merging sub-collection indexes
		/// with this method.
		/// 
		/// <p/><b>NOTE:</b> the index in each Directory must not be
		/// changed (opened by a writer) while this method is
		/// running.  This method does not acquire a write lock in
		/// each input Directory, so it is up to the caller to
		/// enforce this.
		/// 
		/// <p/><b>NOTE:</b> while this is running, any attempts to
		/// add or delete documents (with another thread) will be
		/// paused until this method completes.
		/// 
		/// <p/>This method is transactional in how Exceptions are
		/// handled: it does not commit a new segments_N file until
		/// all indexes are added.  This means if an Exception
		/// occurs (for example disk full), then either no indexes
		/// will have been added or they all will have been.<p/>
		/// 
		/// <p/>Note that this requires temporary free space in the
		/// Directory up to 2X the sum of all input indexes
		/// (including the starting index).  If readers/searchers
		/// are open against the starting index, then temporary
		/// free space required will be higher by the size of the
		/// starting index (see {@link #Optimize()} for details).
		/// <p/>
		/// 
		/// <p/>Once this completes, the final size of the index
		/// will be less than the sum of all input index sizes
		/// (including the starting index).  It could be quite a
		/// bit smaller (if there were many pending deletes) or
		/// just slightly smaller.<p/>
		/// 
		/// <p/>
		/// This requires this index not be among those to be added.
		/// 
		/// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
		/// you should immediately close the writer.  See <a
		/// href="#OOME">above</a> for details.<p/>
		/// 
		/// </summary>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public virtual void  AddIndexesNoOptimize(Directory[] dirs)
		{
			
			EnsureOpen();
			
			NoDupDirs(dirs);
			
			// Do not allow add docs or deletes while we are running:
			docWriter.PauseAllThreads();
			
			try
			{
				if (infoStream != null)
					Message("flush at addIndexesNoOptimize");
				Flush(true, false, true);
				
				bool success = false;
				
				StartTransaction(false);
				
				try
				{
					
					int docCount = 0;
					lock (this)
					{
						EnsureOpen();
						
						for (int i = 0; i < dirs.Length; i++)
						{
							if (directory == dirs[i])
							{
								// cannot add this index: segments may be deleted in merge before added
								throw new System.ArgumentException("Cannot add this index to itself");
							}
							
							SegmentInfos sis = new SegmentInfos(); // read infos from dir
							sis.Read(dirs[i]);
							for (int j = 0; j < sis.Count; j++)
							{
								SegmentInfo info = sis.Info(j);
								System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info), "dup info dir=" + info.dir + " name=" + info.name);
								docCount += info.docCount;
								segmentInfos.Add(info); // add each info
							}
						}
					}
					
					// Notify DocumentsWriter that the flushed count just increased
					docWriter.UpdateFlushedDocCount(docCount);
					
					MaybeMerge();
					
					EnsureOpen();
					
					// If after merging there remain segments in the index
					// that are in a different directory, just copy these
					// over into our index.  This is necessary (before
					// finishing the transaction) to avoid leaving the
					// index in an unusable (inconsistent) state.
					ResolveExternalSegments();
					
					EnsureOpen();
					
					success = true;
				}
				finally
				{
					if (success)
					{
						CommitTransaction();
					}
					else
					{
						RollbackTransaction();
					}
				}
			}
			catch (System.OutOfMemoryException oom)
			{
				HandleOOM(oom, "addIndexesNoOptimize");
			}
			finally
			{
				if (docWriter != null)
				{
					docWriter.ResumeAllThreads();
				}
			}
		}
示例#13
0
		public virtual void  AddIndexes(Directory[] dirs)
		{
			
			EnsureOpen();
			
			NoDupDirs(dirs);
			
			// Do not allow add docs or deletes while we are running:
			docWriter.PauseAllThreads();
			
			try
			{
				
				if (infoStream != null)
					Message("flush at addIndexes");
				Flush(true, false, true);
				
				bool success = false;
				
				StartTransaction(false);
				
				try
				{
					
					int docCount = 0;
					lock (this)
					{
						EnsureOpen();
						for (int i = 0; i < dirs.Length; i++)
						{
							SegmentInfos sis = new SegmentInfos(); // read infos from dir
							sis.Read(dirs[i]);
							for (int j = 0; j < sis.Count; j++)
							{
								SegmentInfo info = sis.Info(j);
								docCount += info.docCount;
								System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info));
								segmentInfos.Add(info); // add each info
							}
						}
					}
					
					// Notify DocumentsWriter that the flushed count just increased
					docWriter.UpdateFlushedDocCount(docCount);
					
					Optimize();
					
					success = true;
				}
				finally
				{
					if (success)
					{
						CommitTransaction();
					}
					else
					{
						RollbackTransaction();
					}
				}
			}
			catch (System.OutOfMemoryException oom)
			{
				HandleOOM(oom, "addIndexes(Directory[])");
			}
			finally
			{
				if (docWriter != null)
				{
					docWriter.ResumeAllThreads();
				}
			}
		}
示例#14
0
		private void  Init(Directory d, Analyzer a, bool create, bool closeDir, IndexDeletionPolicy deletionPolicy, bool autoCommit, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit)
		{
			this.closeDir = closeDir;
			directory = d;
			analyzer = a;
			SetMessageID(defaultInfoStream);
			this.maxFieldLength = maxFieldLength;
			
			if (indexingChain == null)
				indexingChain = DocumentsWriter.DefaultIndexingChain;
			
			if (create)
			{
				// Clear the write lock in case it's leftover:
				directory.ClearLock(WRITE_LOCK_NAME);
			}
			
			Lock writeLock = directory.MakeLock(WRITE_LOCK_NAME);
			if (!writeLock.Obtain(writeLockTimeout))
			// obtain write lock
			{
				throw new LockObtainFailedException("Index locked for write: " + writeLock);
			}
			this.writeLock = writeLock; // save it

            bool success = false;
			try
			{
				if (create)
				{
					// Try to read first.  This is to allow create
					// against an index that's currently open for
					// searching.  In this case we write the next
					// segments_N file with no segments:
					bool doCommit;
					try
					{
						segmentInfos.Read(directory);
						segmentInfos.Clear();
						doCommit = false;
					}
					catch (System.IO.IOException e)
					{
						// Likely this means it's a fresh directory
						doCommit = true;
					}
					
					if (autoCommit || doCommit)
					{
						// Always commit if autoCommit=true, else only
						// commit if there is no segments file in this dir
						// already.
						segmentInfos.Commit(directory);
						SupportClass.CollectionsHelper.AddAllIfNotContains(synced, segmentInfos.Files(directory, true));
					}
					else
					{
						// Record that we have a change (zero out all
						// segments) pending:
						changeCount++;
					}
				}
				else
				{
					segmentInfos.Read(directory);
					
					if (commit != null)
					{
						// Swap out all segments, but, keep metadata in
						// SegmentInfos, like version & generation, to
						// preserve write-once.  This is important if
						// readers are open against the future commit
						// points.
						if (commit.GetDirectory() != directory)
							throw new System.ArgumentException("IndexCommit's directory doesn't match my directory");
						SegmentInfos oldInfos = new SegmentInfos();
						oldInfos.Read(directory, commit.GetSegmentsFileName());
						segmentInfos.Replace(oldInfos);
						changeCount++;
						if (infoStream != null)
							Message("init: loaded commit \"" + commit.GetSegmentsFileName() + "\"");
					}
					
					// We assume that this segments_N was previously
					// properly sync'd:
					SupportClass.CollectionsHelper.AddAllIfNotContains(synced, segmentInfos.Files(directory, true));
				}
				
				this.autoCommit = autoCommit;
				SetRollbackSegmentInfos(segmentInfos);
				
				docWriter = new DocumentsWriter(directory, this, indexingChain);
				docWriter.SetInfoStream(infoStream);
				docWriter.SetMaxFieldLength(maxFieldLength);
				
				// Default deleter (for backwards compatibility) is
				// KeepOnlyLastCommitDeleter:
				deleter = new IndexFileDeleter(directory, deletionPolicy == null?new KeepOnlyLastCommitDeletionPolicy():deletionPolicy, segmentInfos, infoStream, docWriter,synced);
				
				if (deleter.startingCommitDeleted)
				// Deletion policy deleted the "head" commit point.
				// We have to mark ourself as changed so that if we
				// are closed w/o any further changes we write a new
				// segments_N file.
					changeCount++;
				
				PushMaxBufferedDocs();
				
				if (infoStream != null)
				{
					Message("init: create=" + create);
					MessageState();
				}

                success = true;
			}
			finally
			{
                if (!success)
                {
                    if (infoStream != null)
                    {
                        Message("init: hit exception on init; releasing write lock");
                    }
                    try
                    {
                        writeLock.Release();
                    }
                    catch (Exception t)
                    {
                        // don't mask the original exception
                    }
                    writeLock = null;
                }
			}
		}
示例#15
0
        /// <summary> Initialize the deleter: find all previous commits in
        /// the Directory, incref the files they reference, call
        /// the policy to let it delete commits.  This will remove
        /// any files not referenced by any of the commits.
        /// </summary>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.StreamWriter infoStream, DocumentsWriter docWriter, System.Collections.Generic.Dictionary <string, string> synced)
        {
            this.docWriter  = docWriter;
            this.infoStream = infoStream;
            this.synced     = synced;

            if (infoStream != null)
            {
                Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy);
            }

            this.policy    = policy;
            this.directory = directory;

            // First pass: walk the files and initialize our ref
            // counts:
            long currentGen            = segmentInfos.GetGeneration();
            IndexFileNameFilter filter = IndexFileNameFilter.GetFilter();

            System.String[] files = directory.ListAll();

            CommitPoint currentCommitPoint = null;

            for (int i = 0; i < files.Length; i++)
            {
                System.String fileName = files[i];

                if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN))
                {
                    // Add this file to refCounts with initial count 0:
                    GetRefCount(fileName);

                    if (fileName.StartsWith(IndexFileNames.SEGMENTS))
                    {
                        // This is a commit (segments or segments_N), and
                        // it's valid (<= the max gen).  Load it, then
                        // incref all files it refers to:
                        if (infoStream != null)
                        {
                            Message("init: load commit \"" + fileName + "\"");
                        }
                        SegmentInfos sis = new SegmentInfos();
                        try
                        {
                            sis.Read(directory, fileName);
                        }
                        catch (System.IO.FileNotFoundException e)
                        {
                            // LUCENE-948: on NFS (and maybe others), if
                            // you have writers switching back and forth
                            // between machines, it's very likely that the
                            // dir listing will be stale and will claim a
                            // file segments_X exists when in fact it
                            // doesn't.  So, we catch this and handle it
                            // as if the file does not exist
                            if (infoStream != null)
                            {
                                Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
                            }
                            sis = null;
                        }
                        catch (System.IO.IOException e)
                        {
                            if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen)
                            {
                                throw e;
                            }
                            else
                            {
                                // Most likely we are opening an index that
                                // has an aborted "future" commit, so suppress
                                // exc in this case
                                sis = null;
                            }
                        }
                        if (sis != null)
                        {
                            CommitPoint commitPoint = new CommitPoint(this, commitsToDelete, directory, sis);
                            if (sis.GetGeneration() == segmentInfos.GetGeneration())
                            {
                                currentCommitPoint = commitPoint;
                            }
                            commits.Add(commitPoint);
                            IncRef(sis, true);

                            if (lastSegmentInfos == null || sis.GetGeneration() > lastSegmentInfos.GetGeneration())
                            {
                                lastSegmentInfos = sis;
                            }
                        }
                    }
                }
            }

            if (currentCommitPoint == null)
            {
                // We did not in fact see the segments_N file
                // corresponding to the segmentInfos that was passed
                // in.  Yet, it must exist, because our caller holds
                // the write lock.  This can happen when the directory
                // listing was stale (eg when index accessed via NFS
                // client with stale directory listing cache).  So we
                // try now to explicitly open this commit point:
                SegmentInfos sis = new SegmentInfos();
                try
                {
                    sis.Read(directory, segmentInfos.GetCurrentSegmentFileName());
                }
                catch (System.IO.IOException e)
                {
                    throw new CorruptIndexException("failed to locate current segments_N file");
                }
                if (infoStream != null)
                {
                    Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName());
                }
                currentCommitPoint = new CommitPoint(this, commitsToDelete, directory, sis);
                commits.Add(currentCommitPoint);
                IncRef(sis, true);
            }

            // We keep commits list in sorted order (oldest to newest):
            commits.Sort();

            // Now delete anything with ref count at 0.  These are
            // presumably abandoned files eg due to crash of
            // IndexWriter.
            System.Collections.Generic.IEnumerator <System.Collections.Generic.KeyValuePair <System.String, RefCount> > it = refCounts.GetEnumerator();
            while (it.MoveNext())
            {
                System.String fileName = (System.String)it.Current.Key;
                RefCount      rc       = (RefCount)refCounts[fileName];
                if (0 == rc.count)
                {
                    if (infoStream != null)
                    {
                        Message("init: removing unreferenced file \"" + fileName + "\"");
                    }
                    DeleteFile(fileName);
                }
            }

            // Finally, give policy a chance to remove things on
            // startup:
            policy.OnInit(commits);

            // Always protect the incoming segmentInfos since
            // sometime it may not be the most recent commit
            Checkpoint(segmentInfos, false);

            startingCommitDeleted = currentCommitPoint.IsDeleted();

            DeleteCommits();
        }