Exemplo n.º 1
0
        protected internal override void  DoCommit()
        {
            if (deletedDocsDirty)
            {
                // re-write deleted
                System.String oldDelFileName = si.GetDelFileName();
                if (oldDelFileName != null)
                {
                    // Mark this file for deletion.  Note that we don't
                    // actually try to delete it until the new segments files is
                    // successfully written:
                    deleter.AddPendingFile(oldDelFileName);
                }

                si.AdvanceDelGen();

                // We can write directly to the actual name (vs to a
                // .tmp & renaming it) because the file is not live
                // until segments file is written:
                deletedDocs.Write(Directory(), si.GetDelFileName());
            }
            if (undeleteAll && si.HasDeletions())
            {
                System.String oldDelFileName = si.GetDelFileName();
                if (oldDelFileName != null)
                {
                    // Mark this file for deletion.  Note that we don't
                    // actually try to delete it until the new segments files is
                    // successfully written:
                    deleter.AddPendingFile(oldDelFileName);
                }
                si.ClearDelGen();
            }
            if (normsDirty)
            {
                // re-write norms
                si.SetNumFields(fieldInfos.Size());
                System.Collections.IEnumerator values = norms.Values.GetEnumerator();
                while (values.MoveNext())
                {
                    Norm norm = (Norm)values.Current;
                    if (norm.dirty)
                    {
                        norm.ReWrite(si);
                    }
                }
            }
            deletedDocsDirty = false;
            normsDirty       = false;
            undeleteAll      = false;
        }
Exemplo n.º 2
0
        private void  LoadDeletedDocs()
        {
            // NOTE: the bitvector is stored using the regular directory, not cfs
            if (HasDeletions(si))
            {
                deletedDocs = new BitVector(Directory(), si.GetDelFileName());

                // Verify # deletes does not exceed maxDoc for this segment:
                if (deletedDocs.Count() > MaxDoc())
                {
                    throw new CorruptIndexException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
                }
            }
        }
Exemplo n.º 3
0
        private void LoadDeletedDocs()
        {
            // NOTE: the bitvector is stored using the regular directory, not cfs
            if (HasDeletions(si))
            {
                deletedDocs = new BitVector(Directory(), si.GetDelFileName());

                System.Diagnostics.Debug.Assert(si.GetDelCount() == deletedDocs.Count(),
                                                "delete count mismatch: info=" + si.GetDelCount() + " vs BitVector=" + deletedDocs.Count());

                // Verify # deletes does not exceed maxDoc for this segment:
                System.Diagnostics.Debug.Assert(si.GetDelCount() <= MaxDoc(),
                                                "delete count mismatch: " + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
            }
            else
            {
                System.Diagnostics.Debug.Assert(si.GetDelCount() == 0);
            }
        }
Exemplo n.º 4
0
		private void  Initialize(SegmentInfo si)
		{
			segment = si.name;
			this.si = si;
			
			bool success = false;
			
			try
			{
				// Use compound file directory for some files, if it exists
				Directory cfsDir = Directory();
				if (si.GetUseCompoundFile())
				{
					cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
					cfsDir = cfsReader;
				}
				
				// No compound file exists - use the multi-file format
				fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
				fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
				
				// Verify two sources of "maxDoc" agree:
				if (fieldsReader.Size() != si.docCount)
				{
					throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
				}
				
				tis = new TermInfosReader(cfsDir, segment, fieldInfos);
				
				// NOTE: the bitvector is stored using the regular directory, not cfs
				if (HasDeletions(si))
				{
					deletedDocs = new BitVector(Directory(), si.GetDelFileName());
					
					// Verify # deletes does not exceed maxDoc for this segment:
					if (deletedDocs.Count() > MaxDoc())
					{
						throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
					}
				}
				
				// make sure that all index files have been read or are kept open
				// so that if an index update removes them we'll still have them
				freqStream = cfsDir.OpenInput(segment + ".frq");
				proxStream = cfsDir.OpenInput(segment + ".prx");
				OpenNorms(cfsDir);
				
				if (fieldInfos.HasVectors())
				{
					// open term vector files only as needed
					termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
				}
				success = true;
			}
			finally
			{
				
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above.  In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					DoClose();
				}
			}
		}
Exemplo n.º 5
0
		internal virtual SegmentReader ReopenSegment(SegmentInfo si, bool doClone, bool openReadOnly)
		{
			lock (this)
			{
				bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName()));
				bool normsUpToDate = true;
				
				bool[] fieldNormsChanged = new bool[core.fieldInfos.Size()];
				int fieldCount = core.fieldInfos.Size();
				for (int i = 0; i < fieldCount; i++)
				{
					if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i)))
					{
						normsUpToDate = false;
						fieldNormsChanged[i] = true;
					}
				}
				
				// if we're cloning we need to run through the reopenSegment logic
				// also if both old and new readers aren't readonly, we clone to avoid sharing modifications
				if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly)
				{
					return this;
				}
				
				// When cloning, the incoming SegmentInfos should not
				// have any changes in it:
				System.Diagnostics.Debug.Assert(!doClone ||(normsUpToDate && deletionsUpToDate));
				
				// clone reader
				SegmentReader clone;
				try
				{
					if (openReadOnly)
						clone = (SegmentReader) System.Activator.CreateInstance(READONLY_IMPL);
					else
						clone = (SegmentReader) System.Activator.CreateInstance(IMPL);
				}
				catch (System.Exception e)
				{
					throw new System.SystemException("cannot load SegmentReader class: " + e, e);
				}
				
				bool success = false;
				try
				{
					core.IncRef();
					clone.core = core;
					clone.readOnly = openReadOnly;
					clone.si = si;
					clone.readBufferSize = readBufferSize;
					
					if (!openReadOnly && hasChanges)
					{
						// My pending changes transfer to the new reader
						clone.pendingDeleteCount = pendingDeleteCount;
						clone.deletedDocsDirty = deletedDocsDirty;
						clone.normsDirty = normsDirty;
						clone.hasChanges = hasChanges;
						hasChanges = false;
					}
					
					if (doClone)
					{
						if (deletedDocs != null)
						{
							deletedDocsRef.IncRef();
							clone.deletedDocs = deletedDocs;
							clone.deletedDocsRef = deletedDocsRef;
						}
					}
					else
					{
						if (!deletionsUpToDate)
						{
							// load deleted docs
							System.Diagnostics.Debug.Assert(clone.deletedDocs == null);
							clone.LoadDeletedDocs();
						}
						else if (deletedDocs != null)
						{
							deletedDocsRef.IncRef();
							clone.deletedDocs = deletedDocs;
							clone.deletedDocsRef = deletedDocsRef;
						}
					}
					
					clone.SetDisableFakeNorms(GetDisableFakeNorms());
					clone.norms = new System.Collections.Hashtable();
					
					// Clone norms
					for (int i = 0; i < fieldNormsChanged.Length; i++)
					{
						
						// Clone unchanged norms to the cloned reader
						if (doClone || !fieldNormsChanged[i])
						{
							System.String curField = core.fieldInfos.FieldInfo(i).name;
							Norm norm = (Norm) this.norms[curField];
							if (norm != null)
								clone.norms[curField] = norm.Clone();
						}
					}
					
					// If we are not cloning, then this will open anew
					// any norms that have changed:
					clone.OpenNorms(si.GetUseCompoundFile()?core.GetCFSReader():Directory(), readBufferSize);
					
					success = true;
				}
				finally
				{
					if (!success)
					{
						// An exception occured during reopen, we have to decRef the norms
						// that we incRef'ed already and close singleNormsStream and FieldsReader
						clone.DecRef();
					}
				}
				
				return clone;
			}
		}
Exemplo n.º 6
0
        /// <summary>Returns a {@link Status} instance detailing
        /// the state of the index.
        ///
        /// </summary>
        /// <param name="onlySegments">list of specific segment names to check
        ///
        /// <p/>As this method checks every byte in the specified
        /// segments, on a large index it can take quite a long
        /// time to run.
        ///
        /// <p/><b>WARNING</b>: make sure
        /// you only call this when the index is not opened by any
        /// writer.
        /// </param>
        public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis    = new SegmentInfos();
            Status       result = new Status();

            result.dir = dir;
            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read any segments file in directory");
                result.missingSegments = true;
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                return(result);
            }

            int numSegments = sis.Count;

            System.String segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput    input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.cantOpenSegments = true;
                return(result);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.missingSegmentVersion = true;
                return(result);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else
            {
                if (format == SegmentInfos.FORMAT_CHECKSUM)
                {
                    sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                {
                    sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_HAS_PROX)
                {
                    sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_USER_DATA)
                {
                    sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
                }
                else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
                }
                else if (format < SegmentInfos.CURRENT_FORMAT)
                {
                    sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                    skip    = true;
                }
                else
                {
                    sFormat = format + " [Lucene 1.3 or prior]";
                }
            }

            result.segmentsFileName = segmentsFileName;
            result.numSegments      = numSegments;
            result.segmentFormat    = sFormat;
            result.userData         = sis.GetUserData();
            System.String userDataString;
            if (sis.GetUserData().Count > 0)
            {
                userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData());
            }
            else
            {
                userDataString = "";
            }

            Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                }
                System.Collections.IEnumerator it = onlySegments.GetEnumerator();
                while (it.MoveNext())
                {
                    if (infoStream != null)
                    {
                        infoStream.Write(" " + it.Current);
                    }
                }
                System.Collections.IEnumerator e = onlySegments.GetEnumerator();
                while (e.MoveNext() == true)
                {
                    result.segmentsChecked.Add(e.Current);
                }
                Msg(":");
            }

            if (skip)
            {
                Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.toolOutOfDate = true;
                return(result);
            }


            result.newSegments = (SegmentInfos)sis.Clone();
            result.newSegments.Clear();

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                if (onlySegments != null && !onlySegments.Contains(info.name))
                {
                    continue;
                }
                Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
                result.segmentInfos.Add(segInfoStat);
                Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                segInfoStat.name     = info.name;
                segInfoStat.docCount = info.docCount;

                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    Msg("    compound=" + info.GetUseCompoundFile());
                    segInfoStat.compound = info.GetUseCompoundFile();
                    Msg("    hasProx=" + info.GetHasProx());
                    segInfoStat.hasProx = info.GetHasProx();
                    Msg("    numFiles=" + info.Files().Count);
                    segInfoStat.numFiles = info.Files().Count;
                    Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics();
                    segInfoStat.diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg("    diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics));
                    }

                    int docStoreOffset = info.GetDocStoreOffset();
                    if (docStoreOffset != -1)
                    {
                        Msg("    docStoreOffset=" + docStoreOffset);
                        segInfoStat.docStoreOffset = docStoreOffset;
                        Msg("    docStoreSegment=" + info.GetDocStoreSegment());
                        segInfoStat.docStoreSegment = info.GetDocStoreSegment();
                        Msg("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
                        segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        Msg("    no deletions");
                        segInfoStat.hasDeletions = false;
                    }
                    else
                    {
                        Msg("    has deletions [delFileName=" + delFileName + "]");
                        segInfoStat.hasDeletions      = true;
                        segInfoStat.deletionsFileName = delFileName;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = SegmentReader.Get(info);

                    segInfoStat.openReaderPassed = true;

                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions())
                    {
                        if (reader.deletedDocs.Count() != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (reader.deletedDocs.Count() > reader.MaxDoc())
                        {
                            throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (info.docCount - numDocs != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        segInfoStat.numDeleted = info.docCount - numDocs;
                        Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.GetDelCount() != 0)
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        Msg("OK");
                    }
                    if (reader.MaxDoc() != info.docCount)
                    {
                        throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount);
                    }

                    // Test getFieldNames()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    Msg("OK [" + fieldNames.Count + " fields]");
                    segInfoStat.numFields = fieldNames.Count;

                    // Test Field Norms
                    segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);

                    // Test the Term Index
                    segInfoStat.termIndexStatus = TestTermIndex(info, reader);

                    // Test Stored Fields
                    segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);

                    // Test Term Vectors
                    segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);

                    // Rethrow the first exception we encountered
                    //  This will cause stats for failed segments to be incremented properly
                    if (segInfoStat.fieldNormStatus.error != null)
                    {
                        throw new System.SystemException("Field Norm test failed");
                    }
                    else if (segInfoStat.termIndexStatus.error != null)
                    {
                        throw new System.SystemException("Term Index test failed");
                    }
                    else if (segInfoStat.storedFieldStatus.error != null)
                    {
                        throw new System.SystemException("Stored Field test failed");
                    }
                    else if (segInfoStat.termVectorStatus.error != null)
                    {
                        throw new System.SystemException("Term Vector test failed");
                    }

                    Msg("");
                }
                catch (System.Exception t)
                {
                    Msg("FAILED");
                    System.String comment;
                    comment = "fixIndex() would remove reference to this segment";
                    Msg("    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        infoStream.WriteLine(t.StackTrace);
                    }
                    Msg("");
                    result.totLoseDocCount += toLoseDocCount;
                    result.numBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                result.newSegments.Add(info.Clone());
            }

            if (0 == result.numBadSegments)
            {
                result.clean = true;
                Msg("No problems were detected with this index.\n");
            }
            else
            {
                Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
            }

            return(result);
        }
Exemplo n.º 7
0
        internal virtual SegmentReader ReopenSegment(SegmentInfo si)
        {
            lock (this)
            {
                bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName()));
                bool normsUpToDate     = true;


                bool[] fieldNormsChanged = new bool[fieldInfos.Size()];
                if (normsUpToDate)
                {
                    for (int i = 0; i < fieldInfos.Size(); i++)
                    {
                        if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i)))
                        {
                            normsUpToDate        = false;
                            fieldNormsChanged[i] = true;
                        }
                    }
                }

                if (normsUpToDate && deletionsUpToDate)
                {
                    return(this);
                }


                // clone reader
                SegmentReader clone;
                if (readOnly)
                {
                    clone = new ReadOnlySegmentReader();
                }
                else
                {
                    clone = new SegmentReader();
                }

                bool success = false;
                try
                {
                    clone.readOnly       = readOnly;
                    clone.directory      = directory;
                    clone.si             = si;
                    clone.segment        = segment;
                    clone.readBufferSize = readBufferSize;
                    clone.cfsReader      = cfsReader;
                    clone.storeCFSReader = storeCFSReader;

                    clone.fieldInfos            = fieldInfos;
                    clone.tis                   = tis;
                    clone.freqStream            = freqStream;
                    clone.proxStream            = proxStream;
                    clone.termVectorsReaderOrig = termVectorsReaderOrig;


                    // we have to open a new FieldsReader, because it is not thread-safe
                    // and can thus not be shared among multiple SegmentReaders
                    // TODO: Change this in case FieldsReader becomes thread-safe in the future
                    System.String fieldsSegment;

                    Directory storeDir = Directory();

                    if (si.GetDocStoreOffset() != -1)
                    {
                        fieldsSegment = si.GetDocStoreSegment();
                        if (storeCFSReader != null)
                        {
                            storeDir = storeCFSReader;
                        }
                    }
                    else
                    {
                        fieldsSegment = segment;
                        if (cfsReader != null)
                        {
                            storeDir = cfsReader;
                        }
                    }

                    if (fieldsReader != null)
                    {
                        clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
                    }


                    if (!deletionsUpToDate)
                    {
                        // load deleted docs
                        clone.deletedDocs = null;
                        clone.LoadDeletedDocs();
                    }
                    else
                    {
                        clone.deletedDocs = this.deletedDocs;
                    }

                    clone.norms = new System.Collections.Hashtable();
                    if (!normsUpToDate)
                    {
                        // load norms
                        for (int i = 0; i < fieldNormsChanged.Length; i++)
                        {
                            // copy unchanged norms to the cloned reader and incRef those norms
                            if (!fieldNormsChanged[i])
                            {
                                System.String curField = fieldInfos.FieldInfo(i).name;
                                Norm          norm     = (Norm)this.norms[curField];
                                norm.IncRef();
                                clone.norms[curField] = norm;
                            }
                        }

                        clone.OpenNorms(si.GetUseCompoundFile() ? cfsReader : Directory(), readBufferSize);
                    }
                    else
                    {
                        System.Collections.IEnumerator it = norms.Keys.GetEnumerator();
                        while (it.MoveNext())
                        {
                            System.String field = (System.String)it.Current;
                            Norm          norm  = (Norm)norms[field];
                            norm.IncRef();
                            clone.norms[field] = norm;
                        }
                    }

                    if (clone.singleNormStream == null)
                    {
                        for (int i = 0; i < fieldInfos.Size(); i++)
                        {
                            FieldInfo fi = fieldInfos.FieldInfo(i);
                            if (fi.isIndexed && !fi.omitNorms)
                            {
                                Directory     d        = si.GetUseCompoundFile() ? cfsReader : Directory();
                                System.String fileName = si.GetNormFileName(fi.number);
                                if (si.HasSeparateNorms(fi.number))
                                {
                                    continue;
                                }

                                if (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
                                {
                                    clone.singleNormStream = d.OpenInput(fileName, readBufferSize);
                                    break;
                                }
                            }
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (this.referencedSegmentReader != null)
                    {
                        // this reader shares resources with another SegmentReader,
                        // so we increment the other readers refCount. We don't
                        // increment the refCount of the norms because we did
                        // that already for the shared norms
                        clone.referencedSegmentReader = this.referencedSegmentReader;
                        referencedSegmentReader.IncRefReaderNotNorms();
                    }
                    else
                    {
                        // this reader wasn't reopened, so we increment this
                        // readers refCount
                        clone.referencedSegmentReader = this;
                        IncRefReaderNotNorms();
                    }

                    if (!success)
                    {
                        // An exception occured during reopen, we have to decRef the norms
                        // that we incRef'ed already and close singleNormsStream and FieldsReader
                        clone.DecRef();
                    }
                }

                return(clone);
            }
        }
Exemplo n.º 8
0
        /// <summary>Returns true if index is clean, else false.</summary>
        public static bool Check(Directory dir, bool doFix)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis = new SegmentInfos();

            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                out_Renamed.WriteLine("ERROR: could not read any segments file in directory");
                out_Renamed.Write(t.StackTrace);
                out_Renamed.Flush();
                return(false);
            }

            int numSegments = sis.Count;

            System.String segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput    input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                out_Renamed.WriteLine("ERROR: could not open segments file in directory");
                out_Renamed.Write(t.StackTrace);
                out_Renamed.Flush();
                return(false);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                out_Renamed.WriteLine("ERROR: could not read segment file version in directory");
                out_Renamed.Write(t.StackTrace);
                out_Renamed.Flush();
                return(false);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                skip    = true;
            }
            else
            {
                sFormat = format + " [Lucene 1.3 or prior]";
            }

            out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat);

            if (skip)
            {
                out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                return(false);
            }

            SegmentInfos newSIS = (SegmentInfos)sis.Clone();

            newSIS.Clear();
            bool changed         = false;
            int  totLoseDocCount = 0;
            int  numBadSegments  = 0;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                out_Renamed.WriteLine("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    out_Renamed.WriteLine("    compound=" + info.GetUseCompoundFile());
                    out_Renamed.WriteLine("    numFiles=" + info.Files().Count);
                    out_Renamed.WriteLine(String.Format(nf, "    size (MB)={0:f}", new Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    int docStoreOffset = info.GetDocStoreOffset();
                    if (docStoreOffset != -1)
                    {
                        out_Renamed.WriteLine("    docStoreOffset=" + docStoreOffset);
                        out_Renamed.WriteLine("    docStoreSegment=" + info.GetDocStoreSegment());
                        out_Renamed.WriteLine("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        out_Renamed.WriteLine("    no deletions");
                    }
                    else
                    {
                        out_Renamed.WriteLine("    has deletions [delFileName=" + delFileName + "]");
                    }
                    out_Renamed.Write("    test: open reader.........");
                    reader = SegmentReader.Get(info);
                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions())
                    {
                        out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]");
                    }
                    else
                    {
                        out_Renamed.WriteLine("OK");
                    }

                    out_Renamed.Write("    test: fields, norms.......");
                    System.Collections.IDictionary fieldNames = (System.Collections.IDictionary)reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    System.Collections.IEnumerator it         = fieldNames.Keys.GetEnumerator();
                    while (it.MoveNext())
                    {
                        System.String fieldName = (System.String)it.Current;
                        byte[]        b         = reader.Norms(fieldName);
                        if (b.Length != info.docCount)
                        {
                            throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount);
                        }
                    }
                    out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]");

                    out_Renamed.Write("    test: terms, freq, prox...");
                    TermEnum      termEnum      = reader.Terms();
                    TermPositions termPositions = reader.TermPositions();

                    // Used only to count up # deleted docs for this
                    // term
                    MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);

                    long termCount = 0;
                    long totFreq   = 0;
                    long totPos    = 0;
                    while (termEnum.Next())
                    {
                        termCount++;
                        Term term    = termEnum.Term();
                        int  docFreq = termEnum.DocFreq();
                        termPositions.Seek(term);
                        int lastDoc = -1;
                        int freq0   = 0;
                        totFreq += docFreq;
                        while (termPositions.Next())
                        {
                            freq0++;
                            int doc  = termPositions.Doc();
                            int freq = termPositions.Freq();
                            if (doc <= lastDoc)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc);
                            }
                            lastDoc = doc;
                            if (freq <= 0)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
                            }

                            int lastPos = -1;
                            totPos += freq;
                            for (int j = 0; j < freq; j++)
                            {
                                int pos = termPositions.NextPosition();
                                if (pos < 0)
                                {
                                    throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                                }
                                if (pos <= lastPos)
                                {
                                    throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                                }
                            }
                        }

                        // Now count how many deleted docs occurred in
                        // this term:
                        int delCount;
                        if (reader.HasDeletions())
                        {
                            myTermDocs.Seek(term);
                            while (myTermDocs.Next())
                            {
                            }
                            delCount = myTermDocs.delCount;
                        }
                        else
                        {
                            delCount = 0;
                        }

                        if (freq0 + delCount != docFreq)
                        {
                            throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
                        }
                    }

                    out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");

                    out_Renamed.Write("    test: stored fields.......");
                    int  docCount  = 0;
                    long totFields = 0;
                    for (int j = 0; j < info.docCount; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            docCount++;
                            Document doc = reader.Document(j);
                            totFields += doc.GetFields().Count;
                        }
                    }

                    if (docCount != reader.NumDocs())
                    {
                        throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
                    }

                    out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new Object[] { totFields, (((float)totFields) / docCount) }));

                    out_Renamed.Write("    test: term vectors........");
                    int totVectors = 0;
                    for (int j = 0; j < info.docCount; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
                            if (tfv != null)
                            {
                                totVectors += tfv.Length;
                            }
                        }
                    }

                    out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new Object[] { totVectors, (((float)totVectors) / docCount) }));
                    out_Renamed.WriteLine("");
                }
                catch (System.Exception t)
                {
                    out_Renamed.WriteLine("FAILED");
                    System.String comment;
                    if (doFix)
                    {
                        comment = "will remove reference to this segment (-fix is specified)";
                    }
                    else
                    {
                        comment = "would remove reference to this segment (-fix was not specified)";
                    }
                    out_Renamed.WriteLine("    WARNING: " + comment + "; full exception:");
                    out_Renamed.Write(t.StackTrace);
                    out_Renamed.Flush();
                    out_Renamed.WriteLine("");
                    totLoseDocCount += toLoseDocCount;
                    numBadSegments++;
                    changed = true;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                newSIS.Add(info.Clone());
            }

            if (!changed)
            {
                out_Renamed.WriteLine("No problems were detected with this index.\n");
                return(true);
            }
            else
            {
                out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected");
                if (doFix)
                {
                    out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost");
                }
                else
                {
                    out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified");
                }
                out_Renamed.WriteLine();
            }

            if (doFix)
            {
                out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
                for (int i = 0; i < 5; i++)
                {
                    try
                    {
                        System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
                    }
                    catch (System.Threading.ThreadInterruptedException)
                    {
                        SupportClass.ThreadClass.Current().Interrupt();
                        i--;
                        continue;
                    }

                    out_Renamed.WriteLine("  " + (5 - i) + "...");
                }
                out_Renamed.Write("Writing...");
                try
                {
                    newSIS.Write(dir);
                }
                catch (System.Exception t)
                {
                    out_Renamed.WriteLine("FAILED; exiting");
                    out_Renamed.Write(t.StackTrace);
                    out_Renamed.Flush();
                    return(false);
                }
                out_Renamed.WriteLine("OK");
                out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\"");
            }
            else
            {
                out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]");
            }
            out_Renamed.WriteLine("");

            return(false);
        }
Exemplo n.º 9
0
        private void  Initialize(SegmentInfo si)
        {
            segment = si.name;
            this.si = si;

            bool success = false;

            try
            {
                // Use compound file directory for some files, if it exists
                Directory cfsDir = Directory();
                if (si.GetUseCompoundFile())
                {
                    cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
                    cfsDir    = cfsReader;
                }

                // No compound file exists - use the multi-file format
                fieldInfos   = new FieldInfos(cfsDir, segment + ".fnm");
                fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);

                // Verify two sources of "maxDoc" agree:
                if (fieldsReader.Size() != si.docCount)
                {
                    throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
                }

                tis = new TermInfosReader(cfsDir, segment, fieldInfos);

                // NOTE: the bitvector is stored using the regular directory, not cfs
                if (HasDeletions(si))
                {
                    deletedDocs = new BitVector(Directory(), si.GetDelFileName());

                    // Verify # deletes does not exceed maxDoc for this segment:
                    if (deletedDocs.Count() > MaxDoc())
                    {
                        throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
                    }
                }

                // make sure that all index files have been read or are kept open
                // so that if an index update removes them we'll still have them
                freqStream = cfsDir.OpenInput(segment + ".frq");
                proxStream = cfsDir.OpenInput(segment + ".prx");
                OpenNorms(cfsDir);

                if (fieldInfos.HasVectors())
                {
                    // open term vector files only as needed
                    termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above.  In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    DoClose();
                }
            }
        }
Exemplo n.º 10
0
        internal virtual SegmentReader ReopenSegment(SegmentInfo si)
        {
            lock (this)
            {
                bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName()));
                bool normsUpToDate = true;

                bool[] fieldNormsChanged = new bool[fieldInfos.Size()];
                if (normsUpToDate)
                {
                    for (int i = 0; i < fieldInfos.Size(); i++)
                    {
                        if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i)))
                        {
                            normsUpToDate = false;
                            fieldNormsChanged[i] = true;
                        }
                    }
                }

                if (normsUpToDate && deletionsUpToDate)
                {
                    return this;
                }

                // clone reader
                SegmentReader clone;
                if (readOnly)
                    clone = new ReadOnlySegmentReader();
                else
                    clone = new SegmentReader();

                bool success = false;
                try
                {
                    clone.readOnly = readOnly;
                    clone.directory = directory;
                    clone.si = si;
                    clone.segment = segment;
                    clone.readBufferSize = readBufferSize;
                    clone.cfsReader = cfsReader;
                    clone.storeCFSReader = storeCFSReader;

                    clone.fieldInfos = fieldInfos;
                    clone.tis = tis;
                    clone.freqStream = freqStream;
                    clone.proxStream = proxStream;
                    clone.termVectorsReaderOrig = termVectorsReaderOrig;

                    // we have to open a new FieldsReader, because it is not thread-safe
                    // and can thus not be shared among multiple SegmentReaders
                    // TODO: Change this in case FieldsReader becomes thread-safe in the future
                    System.String fieldsSegment;

                    Directory storeDir = Directory();

                    if (si.GetDocStoreOffset() != - 1)
                    {
                        fieldsSegment = si.GetDocStoreSegment();
                        if (storeCFSReader != null)
                        {
                            storeDir = storeCFSReader;
                        }
                    }
                    else
                    {
                        fieldsSegment = segment;
                        if (cfsReader != null)
                        {
                            storeDir = cfsReader;
                        }
                    }

                    if (fieldsReader != null)
                    {
                        clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
                    }

                    if (!deletionsUpToDate)
                    {
                        // load deleted docs
                        clone.deletedDocs = null;
                        clone.LoadDeletedDocs();
                    }
                    else
                    {
                        clone.deletedDocs = this.deletedDocs;
                    }

                    clone.norms = new System.Collections.Hashtable();
                    if (!normsUpToDate)
                    {
                        // load norms
                        for (int i = 0; i < fieldNormsChanged.Length; i++)
                        {
                            // copy unchanged norms to the cloned reader and incRef those norms
                            if (!fieldNormsChanged[i])
                            {
                                System.String curField = fieldInfos.FieldInfo(i).name;
                                Norm norm = (Norm) this.norms[curField];
                                norm.IncRef();
                                clone.norms[curField] = norm;
                            }
                        }

                        clone.OpenNorms(si.GetUseCompoundFile() ? cfsReader : Directory(), readBufferSize);
                    }
                    else
                    {
                        System.Collections.IEnumerator it = norms.Keys.GetEnumerator();
                        while (it.MoveNext())
                        {
                            System.String field = (System.String) it.Current;
                            Norm norm = (Norm) norms[field];
                            norm.IncRef();
                            clone.norms[field] = norm;
                        }
                    }

                    if (clone.singleNormStream == null)
                    {
                        for (int i = 0; i < fieldInfos.Size(); i++)
                        {
                            FieldInfo fi = fieldInfos.FieldInfo(i);
                            if (fi.isIndexed && !fi.omitNorms)
                            {
                                Directory d = si.GetUseCompoundFile() ? cfsReader : Directory();
                                System.String fileName = si.GetNormFileName(fi.number);
                                if (si.HasSeparateNorms(fi.number))
                                {
                                    continue;
                                }

                                if (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
                                {
                                    clone.singleNormStream = d.OpenInput(fileName, readBufferSize);
                                    break;
                                }
                            }
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (this.referencedSegmentReader != null)
                    {
                        // this reader shares resources with another SegmentReader,
                        // so we increment the other readers refCount. We don't
                        // increment the refCount of the norms because we did
                        // that already for the shared norms
                        clone.referencedSegmentReader = this.referencedSegmentReader;
                        referencedSegmentReader.IncRefReaderNotNorms();
                    }
                    else
                    {
                        // this reader wasn't reopened, so we increment this
                        // readers refCount
                        clone.referencedSegmentReader = this;
                        IncRefReaderNotNorms();
                    }

                    if (!success)
                    {
                        // An exception occured during reopen, we have to decRef the norms
                        // that we incRef'ed already and close singleNormsStream and FieldsReader
                        clone.DecRef();
                    }
                }

                return clone;
            }
        }
Exemplo n.º 11
0
        /// <summary>Determine index files that are no longer referenced
        /// and therefore should be deleted.  This is called once
        /// (by the writer), and then subsequently we add onto
        /// deletable any files that are no longer needed at the
        /// point that we create the unused file (eg when merging
        /// segments), and we only remove from deletable when a
        /// file is successfully deleted.
        /// </summary>

        public void  FindDeletableFiles()
        {
            // Gather all "current" segments:
            System.Collections.Hashtable current = new System.Collections.Hashtable();
            for (int j = 0; j < segmentInfos.Count; j++)
            {
                SegmentInfo segmentInfo = (SegmentInfo)segmentInfos[j];
                current[segmentInfo.name] = segmentInfo;
            }

            // Then go through all files in the Directory that are
            // Lucene index files, and add to deletable if they are
            // not referenced by the current segments info:

            System.String       segmentsInfosFileName = segmentInfos.GetCurrentSegmentFileName();
            IndexFileNameFilter filter = IndexFileNameFilter.GetFilter();

            System.String[] files = directory.List();

            for (int i = 0; i < files.Length; i++)
            {
                if (filter.Accept(null, files[i]) && !files[i].Equals(segmentsInfosFileName) && !files[i].Equals(IndexFileNames.SEGMENTS_GEN))
                {
                    System.String segmentName;
                    System.String extension;

                    // First remove any extension:
                    int loc = files[i].IndexOf((System.Char) '.');
                    if (loc != -1)
                    {
                        extension   = files[i].Substring(1 + loc);
                        segmentName = files[i].Substring(0, (loc) - (0));
                    }
                    else
                    {
                        extension   = null;
                        segmentName = files[i];
                    }

                    // Then, remove any generation count:
                    loc = segmentName.IndexOf((System.Char) '_', 1);
                    if (loc != -1)
                    {
                        segmentName = segmentName.Substring(0, (loc) - (0));
                    }

                    // Delete this file if it's not a "current" segment,
                    // or, it is a single index file but there is now a
                    // corresponding compound file:
                    bool doDelete = false;

                    if (!current.ContainsKey(segmentName))
                    {
                        // Delete if segment is not referenced:
                        doDelete = true;
                    }
                    else
                    {
                        // OK, segment is referenced, but file may still
                        // be orphan'd:
                        SegmentInfo info = (SegmentInfo)current[segmentName];

                        if (filter.IsCFSFile(files[i]) && info.GetUseCompoundFile())
                        {
                            // This file is in fact stored in a CFS file for
                            // this segment:
                            doDelete = true;
                        }
                        else
                        {
                            Pattern p = new System.Text.RegularExpressions.Regex("s\\d+");

                            if ("del".Equals(extension))
                            {
                                // This is a _segmentName_N.del file:
                                if (!files[i].Equals(info.GetDelFileName()))
                                {
                                    // If this is a seperate .del file, but it
                                    // doesn't match the current del filename for
                                    // this segment, then delete it:
                                    doDelete = true;
                                }
                            }
                            else if (extension != null && extension.StartsWith("s") && p.Match(extension).Success)
                            {
                                int field = System.Int32.Parse(extension.Substring(1));
                                // This is a _segmentName_N.sX file:
                                if (!files[i].Equals(info.GetNormFileName(field)))
                                {
                                    // This is an orphan'd separate norms file:
                                    doDelete = true;
                                }
                            }
                            else if ("cfs".Equals(extension) && !info.GetUseCompoundFile())
                            {
                                // This is a partially written
                                // _segmentName.cfs:
                                doDelete = true;
                            }
                        }
                    }

                    if (doDelete)
                    {
                        AddDeletableFile(files[i]);
                        if (infoStream != null)
                        {
                            infoStream.WriteLine("IndexFileDeleter: file \"" + files[i] + "\" is unreferenced in index and will be deleted on next commit");
                        }
                    }
                }
            }
        }