Example #1
0
		/// <summary>Returns true if this single info is optimized (has no
		/// pending norms or deletes, is in the same dir as the
		/// writer, and matches the current compound file setting 
		/// </summary>
		private bool IsOptimized(SegmentInfo info)
		{
			bool hasDeletions = writer.NumDeletedDocs(info) > 0;
			return !hasDeletions && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() &&
                (info.GetUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
		}
Example #2
0
		// Javadoc inherited
		public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info)
		{
			return useCompoundFile;
		}
Example #3
0
		protected internal virtual long SizeDocs(SegmentInfo info)
		{
			if (calibrateSizeByDeletes)
			{
				int delCount = writer.NumDeletedDocs(info);
				return (info.docCount - (long) delCount);
			}
			else
			{
				return info.docCount;
			}
		}
Example #4
0
		/// <summary> Test stored fields for a segment.</summary>
		private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
		{
			Status.StoredFieldStatus status = new Status.StoredFieldStatus();
			
			try
			{
				if (infoStream != null)
				{
					infoStream.Write("    test: stored fields.......");
				}
				
				// Scan stored fields for all documents
				for (int j = 0; j < info.docCount; ++j)
				{
					if (!reader.IsDeleted(j))
					{
						status.docCount++;
						Document doc = reader.Document(j);
						status.totFields += doc.GetFields().Count;
					}
				}
				
				// Validate docCount
				if (status.docCount != reader.NumDocs())
				{
					throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
				}
				
                Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) }));
            }
			catch (System.Exception e)
			{
				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
				status.error = e;
				if (infoStream != null)
				{
					infoStream.WriteLine(e.StackTrace);
				}
			}
			
			return status;
		}
Example #5
0
		/// <summary> Copy everything from src SegmentInfo into our instance.</summary>
		internal void  Reset(SegmentInfo src)
		{
			ClearFiles();
			name = src.name;
			docCount = src.docCount;
			dir = src.dir;
			preLockless = src.preLockless;
			delGen = src.delGen;
			docStoreOffset = src.docStoreOffset;
			docStoreIsCompoundFile = src.docStoreIsCompoundFile;
			if (src.normGen == null)
			{
				normGen = null;
			}
			else
			{
				normGen = new long[src.normGen.Length];
				Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length);
			}
			isCompoundFile = src.isCompoundFile;
			hasSingleNormFile = src.hasSingleNormFile;
			delCount = src.delCount;
		}
Example #6
0
		internal static SegmentReader Get(SegmentInfo si, int readBufferSize, bool doOpenStores, int termInfosIndexDivisor)
		{
			return Get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor);
		}
Example #7
0
		internal virtual SegmentReader ReopenSegment(SegmentInfo si, bool doClone, bool openReadOnly)
		{
			lock (this)
			{
				bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName()));
				bool normsUpToDate = true;
				
				bool[] fieldNormsChanged = new bool[core.fieldInfos.Size()];
				int fieldCount = core.fieldInfos.Size();
				for (int i = 0; i < fieldCount; i++)
				{
					if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i)))
					{
						normsUpToDate = false;
						fieldNormsChanged[i] = true;
					}
				}
				
				// if we're cloning we need to run through the reopenSegment logic
				// also if both old and new readers aren't readonly, we clone to avoid sharing modifications
				if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly)
				{
					return this;
				}
				
				// When cloning, the incoming SegmentInfos should not
				// have any changes in it:
				System.Diagnostics.Debug.Assert(!doClone ||(normsUpToDate && deletionsUpToDate));
				
				// clone reader
				SegmentReader clone;
				try
				{
					if (openReadOnly)
						clone = (SegmentReader) System.Activator.CreateInstance(READONLY_IMPL);
					else
						clone = (SegmentReader) System.Activator.CreateInstance(IMPL);
				}
				catch (System.Exception e)
				{
					throw new System.SystemException("cannot load SegmentReader class: " + e, e);
				}
				
				bool success = false;
				try
				{
					core.IncRef();
					clone.core = core;
					clone.readOnly = openReadOnly;
					clone.si = si;
					clone.readBufferSize = readBufferSize;
					
					if (!openReadOnly && hasChanges)
					{
						// My pending changes transfer to the new reader
						clone.pendingDeleteCount = pendingDeleteCount;
						clone.deletedDocsDirty = deletedDocsDirty;
						clone.normsDirty = normsDirty;
						clone.hasChanges = hasChanges;
						hasChanges = false;
					}
					
					if (doClone)
					{
						if (deletedDocs != null)
						{
							deletedDocsRef.IncRef();
							clone.deletedDocs = deletedDocs;
							clone.deletedDocsRef = deletedDocsRef;
						}
					}
					else
					{
						if (!deletionsUpToDate)
						{
							// load deleted docs
							System.Diagnostics.Debug.Assert(clone.deletedDocs == null);
							clone.LoadDeletedDocs();
						}
						else if (deletedDocs != null)
						{
							deletedDocsRef.IncRef();
							clone.deletedDocs = deletedDocs;
							clone.deletedDocsRef = deletedDocsRef;
						}
					}
					
					clone.SetDisableFakeNorms(GetDisableFakeNorms());
					clone.norms = new System.Collections.Hashtable();
					
					// Clone norms
					for (int i = 0; i < fieldNormsChanged.Length; i++)
					{
						
						// Clone unchanged norms to the cloned reader
						if (doClone || !fieldNormsChanged[i])
						{
							System.String curField = core.fieldInfos.FieldInfo(i).name;
							Norm norm = (Norm) this.norms[curField];
							if (norm != null)
								clone.norms[curField] = norm.Clone();
						}
					}
					
					// If we are not cloning, then this will open anew
					// any norms that have changed:
					clone.OpenNorms(si.GetUseCompoundFile()?core.GetCFSReader():Directory(), readBufferSize);
					
					success = true;
				}
				finally
				{
					if (!success)
					{
						// An exception occured during reopen, we have to decRef the norms
						// that we incRef'ed already and close singleNormsStream and FieldsReader
						clone.DecRef();
					}
				}
				
				return clone;
			}
		}
Example #8
0
            internal CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor)
			{
				segment = si.name;
				this.readBufferSize = readBufferSize;
				this.dir = dir;
				
				bool success = false;
				
				try
				{
					Directory dir0 = dir;
					if (si.GetUseCompoundFile())
					{
						cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
						dir0 = cfsReader;
					}
					cfsDir = dir0;
					
					fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
					
					this.termsIndexDivisor = termsIndexDivisor;
					TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor);
					if (termsIndexDivisor == - 1)
					{
						tisNoIndex = reader;
					}
					else
					{
						tis = reader;
						tisNoIndex = null;
					}
					
					// make sure that all index files have been read or are kept open
					// so that if an index update removes them we'll still have them
					freqStream = cfsDir.OpenInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize);
					
					if (fieldInfos.HasProx())
					{
						proxStream = cfsDir.OpenInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize);
					}
					else
					{
						proxStream = null;
					}
					success = true;
				}
				finally
				{
					if (!success)
					{
						DecRef();
					}
				}


                // Must assign this at the end -- if we hit an
                // exception above core, we don't want to attempt to
                // purge the FieldCache (will hit NPE because core is
                // not assigned yet).
                this.origInstance = origInstance;
			}
Example #9
0
		internal static bool HasDeletions(SegmentInfo si)
		{
			// Don't call ensureOpen() here (it could affect performance)
			return si.HasDeletions();
		}
Example #10
0
        /// <summary> Test the term index.</summary>
        private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
        {
            Status.TermIndexStatus status = new Status.TermIndexStatus();

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: terms, freq, prox...");
                }

                TermEnum      termEnum      = reader.Terms();
                TermPositions termPositions = reader.TermPositions();

                // Used only to count up # deleted docs for this term
                MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);

                int maxDoc = reader.MaxDoc();

                while (termEnum.Next())
                {
                    status.termCount++;
                    Term term    = termEnum.Term();
                    int  docFreq = termEnum.DocFreq();
                    termPositions.Seek(term);
                    int lastDoc = -1;
                    int freq0   = 0;
                    status.totFreq += docFreq;
                    while (termPositions.Next())
                    {
                        freq0++;
                        int doc  = termPositions.Doc();
                        int freq = termPositions.Freq();
                        if (doc <= lastDoc)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
                        }
                        if (doc >= maxDoc)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
                        }

                        lastDoc = doc;
                        if (freq <= 0)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
                        }

                        int lastPos = -1;
                        status.totPos += freq;
                        for (int j = 0; j < freq; j++)
                        {
                            int pos = termPositions.NextPosition();
                            if (pos < -1)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                            }
                            if (pos < lastPos)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                            }
                        }
                    }

                    // Now count how many deleted docs occurred in
                    // this term:
                    int delCount;
                    if (reader.HasDeletions())
                    {
                        myTermDocs.Seek(term);
                        while (myTermDocs.Next())
                        {
                        }
                        delCount = myTermDocs.delCount;
                    }
                    else
                    {
                        delCount = 0;
                    }

                    if (freq0 + delCount != docFreq)
                    {
                        throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
                    }
                }

                Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
            }
            catch (System.Exception e)
            {
                Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
                status.error = e;
                if (infoStream != null)
                {
                    infoStream.WriteLine(e.StackTrace);
                }
            }

            return(status);
        }
Example #11
0
		/// <summary>Flush all pending docs to a new segment </summary>
		internal int Flush(bool closeDocStore)
		{
			lock (this)
			{
				
				System.Diagnostics.Debug.Assert(AllThreadsIdle());
				
				System.Diagnostics.Debug.Assert(numDocsInRAM > 0);
				
				System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM);
				System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0);
				System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
				
				InitFlushState(false);
				
				docStoreOffset = numDocsInStore;
				
				if (infoStream != null)
					Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM);
				
				bool success = false;
				
				try
				{
					
					if (closeDocStore)
					{
						System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null);
						System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName.Equals(flushState.segmentName));
						CloseDocStore();
						flushState.numDocsInStore = 0;
					}
					
					System.Collections.Hashtable threads = new System.Collections.Hashtable();
					for (int i = 0; i < threadStates.Length; i++)
						threads[threadStates[i].consumer] = threadStates[i].consumer;
					consumer.Flush(threads, flushState);
					
					if (infoStream != null)
					{
                        SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory);
                        long newSegmentSize = si.SizeInBytes();
                        System.String message = System.String.Format(nf, "  oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}",
                            new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) });
						Message(message);
					}
					
					flushedDocCount += flushState.numDocs;
					
					DoAfterFlush();
					
					success = true;
				}
				finally
				{
					if (!success)
					{
						Abort();
					}
				}
				
				System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
				
				return flushState.numDocs;
			}
		}
Example #12
0
        /// <summary>Returns a {@link Status} instance detailing
        /// the state of the index.
        ///
        /// </summary>
        /// <param name="onlySegments">list of specific segment names to check
        ///
        /// <p/>As this method checks every byte in the specified
        /// segments, on a large index it can take quite a long
        /// time to run.
        ///
        /// <p/><b>WARNING</b>: make sure
        /// you only call this when the index is not opened by any
        /// writer.
        /// </param>
        public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis    = new SegmentInfos();
            Status       result = new Status();

            result.dir = dir;
            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read any segments file in directory");
                result.missingSegments = true;
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                return(result);
            }

            int numSegments = sis.Count;

            System.String segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput    input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.cantOpenSegments = true;
                return(result);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.missingSegmentVersion = true;
                return(result);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else
            {
                if (format == SegmentInfos.FORMAT_CHECKSUM)
                {
                    sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                {
                    sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_HAS_PROX)
                {
                    sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_USER_DATA)
                {
                    sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
                }
                else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
                }
                else if (format < SegmentInfos.CURRENT_FORMAT)
                {
                    sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                    skip    = true;
                }
                else
                {
                    sFormat = format + " [Lucene 1.3 or prior]";
                }
            }

            result.segmentsFileName = segmentsFileName;
            result.numSegments      = numSegments;
            result.segmentFormat    = sFormat;
            result.userData         = sis.GetUserData();
            System.String userDataString;
            if (sis.GetUserData().Count > 0)
            {
                userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData());
            }
            else
            {
                userDataString = "";
            }

            Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                }
                System.Collections.IEnumerator it = onlySegments.GetEnumerator();
                while (it.MoveNext())
                {
                    if (infoStream != null)
                    {
                        infoStream.Write(" " + it.Current);
                    }
                }
                System.Collections.IEnumerator e = onlySegments.GetEnumerator();
                while (e.MoveNext() == true)
                {
                    result.segmentsChecked.Add(e.Current);
                }
                Msg(":");
            }

            if (skip)
            {
                Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.toolOutOfDate = true;
                return(result);
            }


            result.newSegments = (SegmentInfos)sis.Clone();
            result.newSegments.Clear();

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                if (onlySegments != null && !onlySegments.Contains(info.name))
                {
                    continue;
                }
                Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
                result.segmentInfos.Add(segInfoStat);
                Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                segInfoStat.name     = info.name;
                segInfoStat.docCount = info.docCount;

                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    Msg("    compound=" + info.GetUseCompoundFile());
                    segInfoStat.compound = info.GetUseCompoundFile();
                    Msg("    hasProx=" + info.GetHasProx());
                    segInfoStat.hasProx = info.GetHasProx();
                    Msg("    numFiles=" + info.Files().Count);
                    segInfoStat.numFiles = info.Files().Count;
                    Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics();
                    segInfoStat.diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg("    diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics));
                    }

                    int docStoreOffset = info.GetDocStoreOffset();
                    if (docStoreOffset != -1)
                    {
                        Msg("    docStoreOffset=" + docStoreOffset);
                        segInfoStat.docStoreOffset = docStoreOffset;
                        Msg("    docStoreSegment=" + info.GetDocStoreSegment());
                        segInfoStat.docStoreSegment = info.GetDocStoreSegment();
                        Msg("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
                        segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        Msg("    no deletions");
                        segInfoStat.hasDeletions = false;
                    }
                    else
                    {
                        Msg("    has deletions [delFileName=" + delFileName + "]");
                        segInfoStat.hasDeletions      = true;
                        segInfoStat.deletionsFileName = delFileName;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = SegmentReader.Get(info);

                    segInfoStat.openReaderPassed = true;

                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions())
                    {
                        if (reader.deletedDocs.Count() != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (reader.deletedDocs.Count() > reader.MaxDoc())
                        {
                            throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (info.docCount - numDocs != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        segInfoStat.numDeleted = info.docCount - numDocs;
                        Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.GetDelCount() != 0)
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        Msg("OK");
                    }
                    if (reader.MaxDoc() != info.docCount)
                    {
                        throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount);
                    }

                    // Test getFieldNames()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    Msg("OK [" + fieldNames.Count + " fields]");
                    segInfoStat.numFields = fieldNames.Count;

                    // Test Field Norms
                    segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);

                    // Test the Term Index
                    segInfoStat.termIndexStatus = TestTermIndex(info, reader);

                    // Test Stored Fields
                    segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);

                    // Test Term Vectors
                    segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);

                    // Rethrow the first exception we encountered
                    //  This will cause stats for failed segments to be incremented properly
                    if (segInfoStat.fieldNormStatus.error != null)
                    {
                        throw new System.SystemException("Field Norm test failed");
                    }
                    else if (segInfoStat.termIndexStatus.error != null)
                    {
                        throw new System.SystemException("Term Index test failed");
                    }
                    else if (segInfoStat.storedFieldStatus.error != null)
                    {
                        throw new System.SystemException("Stored Field test failed");
                    }
                    else if (segInfoStat.termVectorStatus.error != null)
                    {
                        throw new System.SystemException("Term Vector test failed");
                    }

                    Msg("");
                }
                catch (System.Exception t)
                {
                    Msg("FAILED");
                    System.String comment;
                    comment = "fixIndex() would remove reference to this segment";
                    Msg("    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        infoStream.WriteLine(t.StackTrace);
                    }
                    Msg("");
                    result.totLoseDocCount += toLoseDocCount;
                    result.numBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                result.newSegments.Add(info.Clone());
            }

            if (0 == result.numBadSegments)
            {
                result.clean = true;
                Msg("No problems were detected with this index.\n");
            }
            else
            {
                Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
            }

            return(result);
        }
Example #13
0
		/// <summary> Returns true if a newly flushed (not from merge)
		/// segment should use the compound file format.
		/// </summary>
		public abstract bool UseCompoundFile(SegmentInfos segments, SegmentInfo newSegment);
Example #14
0
		/// <summary> Obtain the number of deleted docs for a pooled reader.
		/// If the reader isn't being pooled, the segmentInfo's 
		/// delCount is returned.
		/// </summary>
		public virtual int NumDeletedDocs(SegmentInfo info)
		{
			SegmentReader reader = readerPool.GetIfExists(info);
			try
			{
				if (reader != null)
				{
					return reader.NumDeletedDocs();
				}
				else
				{
					return info.GetDelCount();
				}
			}
			finally
			{
				if (reader != null)
				{
					readerPool.Release(reader);
				}
			}
		}
Example #15
0
			// Returns a ref
			public virtual SegmentReader GetIfExists(SegmentInfo info)
			{
				lock (this)
				{
					SegmentReader sr = (SegmentReader) readerMap[info];
					if (sr != null)
					{
						sr.IncRef();
					}
					return sr;
				}
			}
Example #16
0
		internal static bool UsesCompoundFile(SegmentInfo si)
		{
			return si.GetUseCompoundFile();
		}
Example #17
0
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public static SegmentReader Get(bool readOnly, SegmentInfo si, int termInfosIndexDivisor)
		{
			return Get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor);
		}
Example #18
0
		internal static bool HasSeparateNorms(SegmentInfo si)
		{
			return si.HasSeparateNorms();
		}
Example #19
0
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public static SegmentReader Get(bool readOnly, Directory dir, SegmentInfo si, int readBufferSize, bool doOpenStores, int termInfosIndexDivisor)
		{
			SegmentReader instance;
			try
			{
				if (readOnly)
					instance = (SegmentReader) System.Activator.CreateInstance(READONLY_IMPL);
				else
					instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("cannot load SegmentReader class: " + e, e);
			}
			instance.readOnly = readOnly;
			instance.si = si;
			instance.readBufferSize = readBufferSize;
			
			bool success = false;
			
			try
			{
				instance.core = new CoreReaders(instance, dir, si, readBufferSize, termInfosIndexDivisor);
				if (doOpenStores)
				{
					instance.core.OpenDocStores(si);
				}
				instance.LoadDeletedDocs();
				instance.OpenNorms(instance.core.cfsDir, readBufferSize);
				success = true;
			}
			finally
			{
				
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above.  In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					instance.DoClose();
				}
			}
			return instance;
		}
Example #20
0
		internal virtual void  SetSegmentInfo(SegmentInfo info)
		{
			si = info;
		}
Example #21
0
		/// <summary> Test the term index.</summary>
		private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
		{
			Status.TermIndexStatus status = new Status.TermIndexStatus();
			
			try
			{
				if (infoStream != null)
				{
					infoStream.Write("    test: terms, freq, prox...");
				}
				
				TermEnum termEnum = reader.Terms();
				TermPositions termPositions = reader.TermPositions();
				
				// Used only to count up # deleted docs for this term
				MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
				
				int maxDoc = reader.MaxDoc();
				
				while (termEnum.Next())
				{
					status.termCount++;
					Term term = termEnum.Term();
					int docFreq = termEnum.DocFreq();
					termPositions.Seek(term);
					int lastDoc = - 1;
					int freq0 = 0;
					status.totFreq += docFreq;
					while (termPositions.Next())
					{
						freq0++;
						int doc = termPositions.Doc();
						int freq = termPositions.Freq();
						if (doc <= lastDoc)
						{
							throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
						}
						if (doc >= maxDoc)
						{
							throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
						}
						
						lastDoc = doc;
						if (freq <= 0)
						{
							throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
						}
						
						int lastPos = - 1;
						status.totPos += freq;
						for (int j = 0; j < freq; j++)
						{
							int pos = termPositions.NextPosition();
							if (pos < - 1)
							{
								throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
							}
							if (pos < lastPos)
							{
								throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
							}
						}
					}
					
					// Now count how many deleted docs occurred in
					// this term:
					int delCount;
					if (reader.HasDeletions())
					{
						myTermDocs.Seek(term);
						while (myTermDocs.Next())
						{
						}
						delCount = myTermDocs.delCount;
					}
					else
					{
						delCount = 0;
					}
					
					if (freq0 + delCount != docFreq)
					{
						throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
					}
				}
				
				Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
			}
			catch (System.Exception e)
			{
				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
				status.error = e;
				if (infoStream != null)
				{
					infoStream.WriteLine(e.StackTrace);
				}
			}
			
			return status;
		}
Example #22
0
		internal virtual void  StartCommit()
		{
            rollbackSegmentInfo = (SegmentInfo)si.Clone();
			rollbackHasChanges = hasChanges;
			rollbackDeletedDocsDirty = deletedDocsDirty;
			rollbackNormsDirty = normsDirty;
			rollbackPendingDeleteCount = pendingDeleteCount;
			System.Collections.IEnumerator it = norms.Values.GetEnumerator();
			while (it.MoveNext())
			{
				Norm norm = (Norm) it.Current;
				norm.rollbackDirty = norm.dirty;
			}
		}
Example #23
0
		/// <summary> Test term vectors for a segment.</summary>
        private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
		{
			Status.TermVectorStatus status = new Status.TermVectorStatus();
			
			try
			{
				if (infoStream != null)
				{
					infoStream.Write("    test: term vectors........");
				}
				
				for (int j = 0; j < info.docCount; ++j)
				{
					if (!reader.IsDeleted(j))
					{
						status.docCount++;
						TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
						if (tfv != null)
						{
							status.totVectors += tfv.Length;
						}
					}
				}
				
                Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
            }
			catch (System.Exception e)
			{
				Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
				status.error = e;
				if (infoStream != null)
				{
					infoStream.WriteLine(e.StackTrace);
				}
			}
			
			return status;
		}
Example #24
0
			// NOTE: only called from IndexWriter when a near
			// real-time reader is opened, or applyDeletes is run,
			// sharing a segment that's still being merged.  This
			// method is not fully thread safe, and relies on the
			// synchronization in IndexWriter
			internal void  LoadTermsIndex(SegmentInfo si, int termsIndexDivisor)
			{
				lock (this)
				{
					if (tis == null)
					{
						Directory dir0;
						if (si.GetUseCompoundFile())
						{
							// In some cases, we were originally opened when CFS
							// was not used, but then we are asked to open the
							// terms reader with index, the segment has switched
							// to CFS
							if (cfsReader == null)
							{
								cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
							}
							dir0 = cfsReader;
						}
						else
						{
							dir0 = dir;
						}
						
						tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor);
					}
				}
			}
Example #25
0
		public System.Object Clone()
		{
			SegmentInfo si = new SegmentInfo(name, docCount, dir);
			si.isCompoundFile = isCompoundFile;
			si.delGen = delGen;
			si.delCount = delCount;
			si.hasProx = hasProx;
			si.preLockless = preLockless;
			si.hasSingleNormFile = hasSingleNormFile;
            if (this.diagnostics != null)
            {
                si.diagnostics = new System.Collections.Generic.Dictionary<string, string>();
                foreach (string o in diagnostics.Keys)
                {
                    si.diagnostics.Add(o,diagnostics[o]);
                }
            }
			if (normGen != null)
			{
				si.normGen = new long[normGen.Length];
				normGen.CopyTo(si.normGen, 0);
			}
			si.docStoreOffset = docStoreOffset;
			si.docStoreSegment = docStoreSegment;
			si.docStoreIsCompoundFile = docStoreIsCompoundFile;
            if (this.files != null)
            {
                si.files = new System.Collections.Generic.List<string>();
                foreach (string file in files)
                {
                    si.files.Add(file);
                }
            }
            
			return si;
		}
Example #26
0
			internal void  OpenDocStores(SegmentInfo si)
			{
				lock (this)
				{
					
					System.Diagnostics.Debug.Assert(si.name.Equals(segment));
					
					if (fieldsReaderOrig == null)
					{
						Directory storeDir;
						if (si.GetDocStoreOffset() != - 1)
						{
							if (si.GetDocStoreIsCompoundFile())
							{
								System.Diagnostics.Debug.Assert(storeCFSReader == null);
								storeCFSReader = new CompoundFileReader(dir, si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
								storeDir = storeCFSReader;
								System.Diagnostics.Debug.Assert(storeDir != null);
							}
							else
							{
								storeDir = dir;
								System.Diagnostics.Debug.Assert(storeDir != null);
							}
						}
						else if (si.GetUseCompoundFile())
						{
							// In some cases, we were originally opened when CFS
							// was not used, but then we are asked to open doc
							// stores after the segment has switched to CFS
							if (cfsReader == null)
							{
								cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
							}
							storeDir = cfsReader;
							System.Diagnostics.Debug.Assert(storeDir != null);
						}
						else
						{
							storeDir = dir;
							System.Diagnostics.Debug.Assert(storeDir != null);
						}
						
						System.String storesSegment;
						if (si.GetDocStoreOffset() != - 1)
						{
							storesSegment = si.GetDocStoreSegment();
						}
						else
						{
							storesSegment = segment;
						}
						
						fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
						
						// Verify two sources of "maxDoc" agree:
						if (si.GetDocStoreOffset() == - 1 && fieldsReaderOrig.Size() != si.docCount)
						{
							throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.Size() + " but segmentInfo shows " + si.docCount);
						}
						
						if (fieldInfos.HasVectors())
						{
							// open term vector files only as needed
							termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
						}
					}
				}
			}
Example #27
0
		abstract protected internal long Size(SegmentInfo info);
Example #28
0
			// Flush all pending changes to the next generation
			// separate norms file.
			public void  ReWrite(SegmentInfo si)
			{
				System.Diagnostics.Debug.Assert(refCount > 0 && (origNorm == null || origNorm.refCount > 0), "refCount=" + refCount + " origNorm=" + origNorm);
				
				// NOTE: norms are re-written in regular directory, not cfs
				si.AdvanceNormGen(this.number);
				string normFileName = si.GetNormFileName(this.number);
                IndexOutput @out = enclosingInstance.Directory().CreateOutput(normFileName);
                bool success = false;
				try
				{
					try {
                        @out.WriteBytes(bytes, enclosingInstance.MaxDoc());
                    } finally {
                        @out.Close();
                    }
                    success = true;
				}
				finally
				{
                    if (!success)
                    {
                        try
                        {
                            enclosingInstance.Directory().DeleteFile(normFileName);
                        }
                        catch (Exception t)
                        {
                            // suppress this so we keep throwing the
                            // original exception
                        }
                    }
				}
				this.dirty = false;
			}
Example #29
0
		protected internal virtual long SizeBytes(SegmentInfo info)
		{
			long byteSize = info.SizeInBytes();
			if (calibrateSizeByDeletes)
			{
				int delCount = writer.NumDeletedDocs(info);
				float delRatio = (info.docCount <= 0?0.0f:((float) delCount / (float) info.docCount));
				return (info.docCount <= 0?byteSize:(long) (byteSize * (1.0f - delRatio)));
			}
			else
			{
				return byteSize;
			}
		}
Example #30
0
		public static SegmentReader Get(SegmentInfo si)
		{
			return Get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
		}
Example #31
0
		protected internal override long Size(SegmentInfo info)
		{
			return SizeDocs(info);
		}
Example #32
0
			/// <summary> Obtain a SegmentReader from the readerPool.  The reader
			/// must be returned by calling {@link #Release(SegmentReader)}
			/// 
			/// </summary>
			/// <seealso cref="Release(SegmentReader)">
			/// </seealso>
			/// <param name="info">
			/// </param>
			/// <param name="doOpenStores">
			/// </param>
			/// <param name="readBufferSize">
			/// </param>
			/// <param name="termsIndexDivisor">
			/// </param>
			/// <throws>  IOException </throws>
			public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores, int readBufferSize, int termsIndexDivisor)
			{
				lock (this)
				{
					
					if (Enclosing_Instance.poolReaders)
					{
						readBufferSize = BufferedIndexInput.BUFFER_SIZE;
					}
					
					SegmentReader sr = (SegmentReader) readerMap[info];
					if (sr == null)
					{
						// TODO: we may want to avoid doing this while
						// synchronized
						// Returns a ref, which we xfer to readerMap:
						sr = SegmentReader.Get(info, readBufferSize, doOpenStores, termsIndexDivisor);
                        if (info.dir == enclosingInstance.directory)
                        {
                            // Only pool if reader is not external
                            readerMap[info]=sr;
                        }
					}
					else
					{
						if (doOpenStores)
						{
							sr.OpenDocStores();
						}
						if (termsIndexDivisor != - 1 && !sr.TermsIndexLoaded())
						{
							// If this reader was originally opened because we
							// needed to merge it, we didn't load the terms
							// index.  But now, if the caller wants the terms
							// index (eg because it's doing deletes, or an NRT
							// reader is being opened) we ask the reader to
							// load its terms index.
							sr.LoadTermsIndex(termsIndexDivisor);
						}
					}
					
					// Return a ref to our caller
                    if (info.dir == enclosingInstance.directory)
                    {
                        // Only incRef if we pooled (reader is not external)
                        sr.IncRef();
                    }
					return sr;
				}
			}