Information about a segment such as it's name, directory, and files related to the segment. *

NOTE: This API is new and still experimental (subject to change suddenly in the next release)

Inheritance: System.ICloneable
		public override void SetUp()
		{
			base.SetUp();
			SetUpInternal();
			DocHelper.SetupDoc(testDoc);
			info = DocHelper.WriteDoc(dir, testDoc);
		}
Example #2
0
 /// <summary>
 /// Create a {@code SegmentReadState}. </summary>
 public SegmentReadState(Directory dir, SegmentInfo info, FieldInfos fieldInfos, IOContext context, int termsIndexDivisor, string segmentSuffix)
 {
     this.Directory = dir;
     this.SegmentInfo = info;
     this.FieldInfos = fieldInfos;
     this.Context = context;
     this.TermsIndexDivisor = termsIndexDivisor;
     this.SegmentSuffix = segmentSuffix;
 }
        /// <summary>
        /// Sole constructor. </summary>
        public CompressingTermVectorsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode)
        {
            this.compressionMode = compressionMode;
            string segment = si.Name;
            bool success = false;
            fieldInfos = fn;
            numDocs = si.DocCount;
            ChecksumIndexInput indexStream = null;
            try
            {
                // Load the index into memory
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION);
                indexStream = d.OpenChecksumInput(indexStreamFN, context);
                string codecNameIdx = formatName + CompressingTermVectorsWriter.CODEC_SFX_IDX;
                version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT);
                Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer);
                indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

                if (version >= CompressingTermVectorsWriter.VERSION_CHECKSUM)
                {
                    indexStream.ReadVLong(); // the end of the data file
                    CodecUtil.CheckFooter(indexStream);
                }
                else
                {
                    CodecUtil.CheckEOF(indexStream);
                }
                indexStream.Dispose();
                indexStream = null;

                // Open the data file and read metadata
                string vectorsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_EXTENSION);
                vectorsStream = d.OpenInput(vectorsStreamFN, context);
                string codecNameDat = formatName + CompressingTermVectorsWriter.CODEC_SFX_DAT;
                int version2 = CodecUtil.CheckHeader(vectorsStream, codecNameDat, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT);
                if (version != version2)
                {
                    throw new Exception("Version mismatch between stored fields index and data: " + version + " != " + version2);
                }
                Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.FilePointer);

                packedIntsVersion = vectorsStream.ReadVInt();
                chunkSize = vectorsStream.ReadVInt();
                decompressor = compressionMode.NewDecompressor();
                this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0);

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this, indexStream);
                }
            }
        }
        private int NumBufferedDocs; // docBase + numBufferedDocs == current doc ID

        /// <summary>
        /// Sole constructor. </summary>
        public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize)
        {
            Debug.Assert(directory != null);
            this.Directory = directory;
            this.Segment = si.Name;
            this.SegmentSuffix = segmentSuffix;
            this.CompressionMode = compressionMode;
            this.Compressor = compressionMode.NewCompressor();
            this.ChunkSize = chunkSize;
            this.DocBase = 0;
            this.BufferedDocs = new GrowableByteArrayDataOutput(chunkSize);
            this.NumStoredFields = new int[16];
            this.EndOffsets = new int[16];
            this.NumBufferedDocs = 0;

            bool success = false;
            IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION), context);
            try
            {
                FieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context);

                string codecNameIdx = formatName + CODEC_SFX_IDX;
                string codecNameDat = formatName + CODEC_SFX_DAT;
                CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT);
                CodecUtil.WriteHeader(FieldsStream, codecNameDat, VERSION_CURRENT);
                Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == FieldsStream.FilePointer);
                Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer);

                IndexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
                indexStream = null;

                FieldsStream.WriteVInt(chunkSize);
                FieldsStream.WriteVInt(PackedInts.VERSION_CURRENT);

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(indexStream);
                    Abort();
                }
            }
        }
Example #5
0
 // note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
 public SegmentMerger(IList<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate)
 {
     // validate incoming readers
     if (validate)
     {
         foreach (AtomicReader reader in readers)
         {
             reader.CheckIntegrity();
         }
     }
     MergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort);
     Directory = dir;
     this.TermIndexInterval = termIndexInterval;
     this.Codec = segmentInfo.Codec;
     this.Context = context;
     this.FieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
     MergeState.SegmentInfo.DocCount = SetDocMaps();
 }
Example #6
0
		public void  Read(Directory directory)
		{
			
			IndexInput input = directory.OpenInput(IndexFileNames.SEGMENTS);
			try
			{
				int format = input.ReadInt();
				if (format < 0)
				{
					// file contains explicit format info
					// check that it is a format we can understand
					if (format < FORMAT)
						throw new System.IO.IOException("Unknown format version: " + format);
					version = input.ReadLong(); // read version
					counter = input.ReadInt(); // read counter
				}
				else
				{
					// file is in old format without explicit format info
					counter = format;
				}
				
				for (int i = input.ReadInt(); i > 0; i--)
				{
					// read segmentInfos
					SegmentInfo si = new SegmentInfo(input.ReadString(), input.ReadInt(), directory);
					Add(si);
				}
				
				if (format >= 0)
				{
					// in old format the version number may be at the end of the file
					if (input.GetFilePointer() >= input.Length())
						version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
					// old file format without version number
					else
						version = input.ReadLong(); // read version
				}
			}
			finally
			{
				input.Close();
			}
		}
Example #7
0
		/// <summary> Copy everything from src SegmentInfo into our instance.</summary>
		internal void  Reset(SegmentInfo src)
		{
			name = src.name;
			docCount = src.docCount;
			dir = src.dir;
			preLockless = src.preLockless;
			delGen = src.delGen;
			if (src.normGen == null)
			{
				normGen = null;
			}
			else
			{
				normGen = new long[src.normGen.Length];
				Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length);
			}
			isCompoundFile = src.isCompoundFile;
			hasSingleNormFile = src.hasSingleNormFile;
		}
        // public static boolean DEBUG = false;
        /// <summary>
        /// Sole constructor. </summary>
        public Lucene41PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, string segmentSuffix)
        {
            bool success = false;
            IndexInput docIn = null;
            IndexInput posIn = null;
            IndexInput payIn = null;
            try
            {
                docIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), ioContext);
                Version = CodecUtil.CheckHeader(docIn, Lucene41PostingsWriter.DOC_CODEC, Lucene41PostingsWriter.VERSION_START, Lucene41PostingsWriter.VERSION_CURRENT);
                forUtil = new ForUtil(docIn);

                if (fieldInfos.HasProx())
                {
                    posIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), ioContext);
                    CodecUtil.CheckHeader(posIn, Lucene41PostingsWriter.POS_CODEC, Version, Version);

                    if (fieldInfos.HasPayloads() || fieldInfos.HasOffsets())
                    {
                        payIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), ioContext);
                        CodecUtil.CheckHeader(payIn, Lucene41PostingsWriter.PAY_CODEC, Version, Version);
                    }
                }

                this.DocIn = docIn;
                this.PosIn = posIn;
                this.PayIn = payIn;
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(docIn, posIn, payIn);
                }
            }
        }
Example #9
0
        /// <summary>
        /// Sole constructor.
        /// </summary>
        /// <param name="info">
        ///          <seealso cref="SegmentInfo"/> that we wrap </param>
        /// <param name="delCount">
        ///          number of deleted documents in this segment </param>
        /// <param name="delGen">
        ///          deletion generation number (used to name deletion files) </param>
        /// <param name="fieldInfosGen">
        ///          FieldInfos generation number (used to name field-infos files)
        ///  </param>
        public SegmentCommitInfo(SegmentInfo info, int delCount, long delGen, long fieldInfosGen)
        {
            this.Info = info;
            this.DelCount_Renamed = delCount;
            this.DelGen_Renamed = delGen;
            if (delGen == -1)
            {
                NextWriteDelGen = 1;
            }
            else
            {
                NextWriteDelGen = delGen + 1;
            }

            this.FieldInfosGen_Renamed = fieldInfosGen;
            if (fieldInfosGen == -1)
            {
                NextWriteFieldInfosGen = 1;
            }
            else
            {
                NextWriteFieldInfosGen = fieldInfosGen + 1;
            }
        }
Example #10
0
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public static SegmentReader Get(bool readOnly, SegmentInfo si, int termInfosIndexDivisor)
		{
			return Get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor);
		}
Example #11
0
 internal static bool HasDeletions(SegmentInfo si)
 {
     return(si.dir.FileExists(si.name + ".del"));
 }
Example #12
0
 /// <summary>Returns true if this single nfo is optimized (has no
 /// pending norms or deletes, is in the same dir as the
 /// writer, and matches the current compound file setting
 /// </summary>
 private bool IsOptimized(IndexWriter writer, SegmentInfo info)
 {
     return(!info.HasDeletions() && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() && info.GetUseCompoundFile() == useCompoundFile);
 }
Example #13
0
        /// <summary>Checks if any merges are now necessary and returns a
        /// {@link MergePolicy.MergeSpecification} if so.  A merge
        /// is necessary when there are more than {@link
        /// #setMergeFactor} segments at a given level.  When
        /// multiple levels have too many segments, this method
        /// will return multiple merges, allowing the {@link
        /// MergeScheduler} to use concurrency.
        /// </summary>
        public override MergeSpecification FindMerges(SegmentInfos infos)
        {
            int numSegments = infos.Count;

            if (Verbose())
            {
                Message("findMerges: " + numSegments + " segments");
            }

            // Compute levels, which is just log (base mergeFactor)
            // of the size of each segment
            float[] levels = new float[numSegments];
            float   norm   = (float)System.Math.Log(mergeFactor);

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = infos.Info(i);
                long        size = Size(info);

                // Floor tiny segments
                if (size < 1)
                {
                    size = 1;
                }
                levels[i] = (float)System.Math.Log(size) / norm;
            }

            float levelFloor;

            if (minMergeSize <= 0)
            {
                levelFloor = (float)0.0;
            }
            else
            {
                levelFloor = (float)(System.Math.Log(minMergeSize) / norm);
            }

            // Now, we quantize the log values into levels.  The
            // first level is any segment whose log size is within
            // LEVEL_LOG_SPAN of the max size, or, who has such as
            // segment "to the right".  Then, we find the max of all
            // other segments and use that to define the next level
            // segment, etc.

            MergeSpecification spec = null;

            int start = 0;

            while (start < numSegments)
            {
                // Find max level of all segments not already
                // quantized.
                float maxLevel = levels[start];
                for (int i = 1 + start; i < numSegments; i++)
                {
                    float level = levels[i];
                    if (level > maxLevel)
                    {
                        maxLevel = level;
                    }
                }

                // Now search backwards for the rightmost segment that
                // falls into this level:
                float levelBottom;
                if (maxLevel < levelFloor)
                {
                    // All remaining segments fall into the min level
                    levelBottom = -1.0F;
                }
                else
                {
                    levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN);

                    // Force a boundary at the level floor
                    if (levelBottom < levelFloor && maxLevel >= levelFloor)
                    {
                        levelBottom = levelFloor;
                    }
                }

                int upto = numSegments - 1;
                while (upto >= start)
                {
                    if (levels[upto] >= levelBottom)
                    {
                        break;
                    }
                    upto--;
                }
                if (Verbose())
                {
                    Message("  level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
                }

                // Finally, record all merges that are viable at this level:
                int end = start + mergeFactor;
                while (end <= 1 + upto)
                {
                    bool anyTooLarge = false;
                    for (int i = start; i < end; i++)
                    {
                        SegmentInfo info = infos.Info(i);
                        anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);
                    }

                    if (!anyTooLarge)
                    {
                        if (spec == null)
                        {
                            spec = new MergeSpecification();
                        }
                        if (Verbose())
                        {
                            Message("    " + start + " to " + end + ": add this merge");
                        }
                        spec.Add(MakeOneMerge(infos, infos.Range(start, end)));
                    }
                    else if (Verbose())
                    {
                        Message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
                    }

                    start = end;
                    end   = start + mergeFactor;
                }

                start = 1 + upto;
            }

            return(spec);
        }
Example #14
0
 public static SegmentReader Get(SegmentInfo si)
 {
     return(Get(si.dir, si, null, false, false));
 }
		protected internal override long Size(SegmentInfo info)
		{
			return SizeDocs(info);
		}
Example #16
0
 public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
 {
     return(Get(si.dir, si, sis, closeDir, true));
 }
Example #17
0
        internal virtual SegmentReader ReopenSegment(SegmentInfo si)
        {
            lock (this)
            {
                bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName()));
                bool normsUpToDate     = true;


                bool[] fieldNormsChanged = new bool[fieldInfos.Size()];
                if (normsUpToDate)
                {
                    for (int i = 0; i < fieldInfos.Size(); i++)
                    {
                        if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i)))
                        {
                            normsUpToDate        = false;
                            fieldNormsChanged[i] = true;
                        }
                    }
                }

                if (normsUpToDate && deletionsUpToDate)
                {
                    return(this);
                }


                // clone reader
                SegmentReader clone   = new SegmentReader();
                bool          success = false;
                try
                {
                    clone.directory      = directory;
                    clone.si             = si;
                    clone.segment        = segment;
                    clone.readBufferSize = readBufferSize;
                    clone.cfsReader      = cfsReader;
                    clone.storeCFSReader = storeCFSReader;

                    clone.fieldInfos            = fieldInfos;
                    clone.tis                   = tis;
                    clone.freqStream            = freqStream;
                    clone.proxStream            = proxStream;
                    clone.termVectorsReaderOrig = termVectorsReaderOrig;


                    // we have to open a new FieldsReader, because it is not thread-safe
                    // and can thus not be shared among multiple SegmentReaders
                    // TODO: Change this in case FieldsReader becomes thread-safe in the future
                    System.String fieldsSegment;

                    Directory storeDir = Directory();

                    if (si.GetDocStoreOffset() != -1)
                    {
                        fieldsSegment = si.GetDocStoreSegment();
                        if (storeCFSReader != null)
                        {
                            storeDir = storeCFSReader;
                        }
                    }
                    else
                    {
                        fieldsSegment = segment;
                        if (cfsReader != null)
                        {
                            storeDir = cfsReader;
                        }
                    }

                    if (fieldsReader != null)
                    {
                        clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
                    }


                    if (!deletionsUpToDate)
                    {
                        // load deleted docs
                        clone.deletedDocs = null;
                        clone.LoadDeletedDocs();
                    }
                    else
                    {
                        clone.deletedDocs = this.deletedDocs;
                    }

                    clone.norms = new System.Collections.Hashtable();
                    if (!normsUpToDate)
                    {
                        // load norms
                        for (int i = 0; i < fieldNormsChanged.Length; i++)
                        {
                            // copy unchanged norms to the cloned reader and incRef those norms
                            if (!fieldNormsChanged[i])
                            {
                                System.String curField = fieldInfos.FieldInfo(i).name;
                                Norm          norm     = (Norm)this.norms[curField];
                                norm.IncRef();
                                clone.norms[curField] = norm;
                            }
                        }

                        clone.OpenNorms(si.GetUseCompoundFile() ? cfsReader : Directory(), readBufferSize);
                    }
                    else
                    {
                        System.Collections.IEnumerator it = norms.Keys.GetEnumerator();
                        while (it.MoveNext())
                        {
                            System.String field = (System.String)it.Current;
                            Norm          norm  = (Norm)norms[field];
                            norm.IncRef();
                            clone.norms[field] = norm;
                        }
                    }

                    if (clone.singleNormStream == null)
                    {
                        for (int i = 0; i < fieldInfos.Size(); i++)
                        {
                            FieldInfo fi = fieldInfos.FieldInfo(i);
                            if (fi.isIndexed && !fi.omitNorms)
                            {
                                Directory     d        = si.GetUseCompoundFile() ? cfsReader : Directory();
                                System.String fileName = si.GetNormFileName(fi.number);
                                if (si.HasSeparateNorms(fi.number))
                                {
                                    continue;
                                }

                                if (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
                                {
                                    clone.singleNormStream = d.OpenInput(fileName, readBufferSize);
                                    break;
                                }
                            }
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (this.referencedSegmentReader != null)
                    {
                        // this reader shares resources with another SegmentReader,
                        // so we increment the other readers refCount. We don't
                        // increment the refCount of the norms because we did
                        // that already for the shared norms
                        clone.referencedSegmentReader = this.referencedSegmentReader;
                        referencedSegmentReader.IncRefReaderNotNorms();
                    }
                    else
                    {
                        // this reader wasn't reopened, so we increment this
                        // readers refCount
                        clone.referencedSegmentReader = this;
                        IncRefReaderNotNorms();
                    }

                    if (!success)
                    {
                        // An exception occured during reopen, we have to decRef the norms
                        // that we incRef'ed already and close singleNormsStream and FieldsReader
                        clone.DecRef();
                    }
                }

                return(clone);
            }
        }
Example #18
0
        private void  Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores)
        {
            segment             = si.name;
            this.si             = si;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                // Use compound file directory for some files, if it exists
                Directory cfsDir = Directory();
                if (si.GetUseCompoundFile())
                {
                    cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
                    cfsDir    = cfsReader;
                }

                Directory storeDir;

                if (doOpenStores)
                {
                    if (si.GetDocStoreOffset() != -1)
                    {
                        if (si.GetDocStoreIsCompoundFile())
                        {
                            storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
                            storeDir       = storeCFSReader;
                        }
                        else
                        {
                            storeDir = Directory();
                        }
                    }
                    else
                    {
                        storeDir = cfsDir;
                    }
                }
                else
                {
                    storeDir = null;
                }

                // No compound file exists - use the multi-file format
                fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");

                System.String fieldsSegment;

                if (si.GetDocStoreOffset() != -1)
                {
                    fieldsSegment = si.GetDocStoreSegment();
                }
                else
                {
                    fieldsSegment = segment;
                }

                if (doOpenStores)
                {
                    fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);

                    // Verify two sources of "maxDoc" agree:
                    if (si.GetDocStoreOffset() == -1 && fieldsReader.Size() != si.docCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
                    }
                }

                tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);

                LoadDeletedDocs();

                // make sure that all index files have been read or are kept open
                // so that if an index update removes them we'll still have them
                freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize);
                proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize);
                OpenNorms(cfsDir, readBufferSize);

                if (doOpenStores && fieldInfos.HasVectors())
                {
                    // open term vector files only as needed
                    System.String vectorsSegment;
                    if (si.GetDocStoreOffset() != -1)
                    {
                        vectorsSegment = si.GetDocStoreSegment();
                    }
                    else
                    {
                        vectorsSegment = segment;
                    }
                    termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above.  In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    DoClose();
                }
            }
        }
Example #19
0
 /// <throws>  CorruptIndexException if the index is corrupt </throws>
 /// <throws>  IOException if there is a low-level IO error </throws>
 public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir, int readBufferSize)
 {
     return(Get(dir, si, sis, closeDir, ownDir, readBufferSize, true));
 }
Example #20
0
 /// <throws>  CorruptIndexException if the index is corrupt </throws>
 /// <throws>  IOException if there is a low-level IO error </throws>
 public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
 {
     return(Get(si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE, true));
 }
Example #21
0
 /// <throws>  CorruptIndexException if the index is corrupt </throws>
 /// <throws>  IOException if there is a low-level IO error </throws>
 internal static SegmentReader Get(SegmentInfo si, int readBufferSize, bool doOpenStores)
 {
     return(Get(si.dir, si, null, false, false, readBufferSize, doOpenStores));
 }
Example #22
0
 /// <throws>  CorruptIndexException if the index is corrupt </throws>
 /// <throws>  IOException if there is a low-level IO error </throws>
 public static SegmentReader Get(SegmentInfo si, int readBufferSize)
 {
     return(Get(si.dir, si, null, false, false, readBufferSize, true));
 }
Example #23
0
 /// <throws>  CorruptIndexException if the index is corrupt </throws>
 /// <throws>  IOException if there is a low-level IO error </throws>
 internal static SegmentReader Get(SegmentInfo si, bool doOpenStores)
 {
     return(Get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, doOpenStores));
 }
Example #24
0
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public static SegmentReader Get(bool readOnly, Directory dir, SegmentInfo si, int readBufferSize, bool doOpenStores, int termInfosIndexDivisor)
		{
			SegmentReader instance;
			try
			{
				if (readOnly)
					instance = (SegmentReader) System.Activator.CreateInstance(READONLY_IMPL);
				else
					instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("cannot load SegmentReader class: " + e, e);
			}
			instance.readOnly = readOnly;
			instance.si = si;
			instance.readBufferSize = readBufferSize;
			
			bool success = false;
			
			try
			{
				instance.core = new CoreReaders(dir, si, readBufferSize, termInfosIndexDivisor);
				if (doOpenStores)
				{
					instance.core.OpenDocStores(si);
				}
				instance.LoadDeletedDocs();
				instance.OpenNorms(instance.core.cfsDir, readBufferSize);
				success = true;
			}
			finally
			{
				
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above.  In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					instance.DoClose();
				}
			}
			return instance;
		}
		/// <summary>Flush all pending docs to a new segment </summary>
		internal int Flush(bool closeDocStore)
		{
			lock (this)
			{
				
				System.Diagnostics.Debug.Assert(AllThreadsIdle());
				
				System.Diagnostics.Debug.Assert(numDocsInRAM > 0);
				
				System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM);
				System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0);
				System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
				
				InitFlushState(false);
				
				docStoreOffset = numDocsInStore;
				
				if (infoStream != null)
					Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM);
				
				bool success = false;
				
				try
				{
					
					if (closeDocStore)
					{
						System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null);
						System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName.Equals(flushState.segmentName));
						CloseDocStore();
						flushState.numDocsInStore = 0;
					}
					
					System.Collections.Hashtable threads = new System.Collections.Hashtable();
					for (int i = 0; i < threadStates.Length; i++)
						threads[threadStates[i].consumer] = threadStates[i].consumer;
					consumer.Flush(threads, flushState);
					
					if (infoStream != null)
					{
                        SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory);
                        long newSegmentSize = si.SizeInBytes();
                        System.String message = System.String.Format(nf, "  oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}",
                            new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) });
						Message(message);
					}
					
					flushedDocCount += flushState.numDocs;
					
					DoAfterFlush();
					
					success = true;
				}
				finally
				{
					if (!success)
					{
						Abort();
					}
				}
				
				System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
				
				return flushState.numDocs;
			}
		}
Example #26
0
 internal static bool HasDeletions(SegmentInfo si)
 {
     // Don't call ensureOpen() here (it could affect performance)
     return(si.HasDeletions());
 }
Example #27
0
 /// <summary> Test stored fields for a segment.</summary>
 private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
 {
     var status = new Status.StoredFieldStatus();
     
     try
     {
         if (infoStream != null)
         {
             infoStream.Write("    test: stored fields.......");
         }
         
         // Scan stored fields for all documents
         for (int j = 0; j < info.docCount; ++j)
         {
             if (!reader.IsDeleted(j))
             {
                 status.docCount++;
                 Document doc = reader.Document(j);
                 status.totFields += doc.GetFields().Count;
             }
         }
         
         // Validate docCount
         if (status.docCount != reader.NumDocs())
         {
             throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
         }
         
         Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) }));
     }
     catch (System.Exception e)
     {
         Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
         status.error = e;
         if (infoStream != null)
         {
             infoStream.WriteLine(e.StackTrace);
         }
     }
     
     return status;
 }
Example #28
0
 internal static bool UsesCompoundFile(SegmentInfo si)
 {
     return(si.GetUseCompoundFile());
 }
Example #29
0
 /// <throws>  CorruptIndexException if the index is corrupt </throws>
 /// <throws>  IOException if there is a low-level IO error </throws>
 public static SegmentReader Get(SegmentInfo si)
 {
     return(Get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true));
 }
Example #30
0
 internal static bool HasSeparateNorms(SegmentInfo si)
 {
     return(si.HasSeparateNorms());
 }
Example #31
0
        /// <summary> Finds merges necessary to expunge all deletes from the
        /// index.  We simply merge adjacent segments that have
        /// deletes, up to mergeFactor at a time.
        /// </summary>
        public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
        {
            int numSegments = segmentInfos.Count;

            if (Verbose())
            {
                Message("findMergesToExpungeDeletes: " + numSegments + " segments");
            }

            MergeSpecification spec       = new MergeSpecification();
            int firstSegmentWithDeletions = -1;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info     = segmentInfos.Info(i);
                int         delCount = writer.NumDeletedDocs(info);
                if (delCount > 0)
                {
                    if (Verbose())
                    {
                        Message("  segment " + info.name + " has deletions");
                    }
                    if (firstSegmentWithDeletions == -1)
                    {
                        firstSegmentWithDeletions = i;
                    }
                    else if (i - firstSegmentWithDeletions == mergeFactor)
                    {
                        // We've seen mergeFactor segments in a row with
                        // deletions, so force a merge now:
                        if (Verbose())
                        {
                            Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        }
                        spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
                        firstSegmentWithDeletions = i;
                    }
                }
                else if (firstSegmentWithDeletions != -1)
                {
                    // End of a sequence of segments with deletions, so,
                    // merge those past segments even if it's fewer than
                    // mergeFactor segments
                    if (Verbose())
                    {
                        Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    }
                    spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
                    firstSegmentWithDeletions = -1;
                }
            }

            if (firstSegmentWithDeletions != -1)
            {
                if (Verbose())
                {
                    Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                }
                spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments)));
            }

            return(spec);
        }
Example #32
0
 public override void  SetUp()
 {
     base.SetUp();
     DocHelper.SetupDoc(testDoc);
     info = DocHelper.WriteDoc(dir, testDoc);
 }
Example #33
0
        /// <summary>Returns a {@link Status} instance detailing
        /// the state of the index.
        ///
        /// </summary>
        /// <param name="onlySegments">list of specific segment names to check
        ///
        /// <p/>As this method checks every byte in the specified
        /// segments, on a large index it can take quite a long
        /// time to run.
        ///
        /// <p/><b>WARNING</b>: make sure
        /// you only call this when the index is not opened by any
        /// writer.
        /// </param>
        public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments)
        {
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
            SegmentInfos sis    = new SegmentInfos();
            Status       result = new Status();

            result.dir = dir;
            try
            {
                sis.Read(dir);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read any segments file in directory");
                result.missingSegments = true;
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                return(result);
            }

            int numSegments = sis.Count;

            System.String segmentsFileName = sis.GetCurrentSegmentFileName();
            IndexInput    input            = null;

            try
            {
                input = dir.OpenInput(segmentsFileName);
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not open segments file in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.cantOpenSegments = true;
                return(result);
            }
            int format = 0;

            try
            {
                format = input.ReadInt();
            }
            catch (System.Exception t)
            {
                Msg("ERROR: could not read segment file version in directory");
                if (infoStream != null)
                {
                    infoStream.WriteLine(t.StackTrace);
                }
                result.missingSegmentVersion = true;
                return(result);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
            }

            System.String sFormat = "";
            bool          skip    = false;

            if (format == SegmentInfos.FORMAT)
            {
                sFormat = "FORMAT [Lucene Pre-2.1]";
            }
            if (format == SegmentInfos.FORMAT_LOCKLESS)
            {
                sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
            }
            else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            {
                sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
            }
            else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            {
                sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
            }
            else
            {
                if (format == SegmentInfos.FORMAT_CHECKSUM)
                {
                    sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                {
                    sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_HAS_PROX)
                {
                    sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
                }
                else if (format == SegmentInfos.FORMAT_USER_DATA)
                {
                    sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
                }
                else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
                }
                else if (format < SegmentInfos.CURRENT_FORMAT)
                {
                    sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                    skip    = true;
                }
                else
                {
                    sFormat = format + " [Lucene 1.3 or prior]";
                }
            }

            result.segmentsFileName = segmentsFileName;
            result.numSegments      = numSegments;
            result.segmentFormat    = sFormat;
            result.userData         = sis.GetUserData();
            System.String userDataString;
            if (sis.GetUserData().Count > 0)
            {
                userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData());
            }
            else
            {
                userDataString = "";
            }

            Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

            if (onlySegments != null)
            {
                result.partial = true;
                if (infoStream != null)
                {
                    infoStream.Write("\nChecking only these segments:");
                }
                System.Collections.IEnumerator it = onlySegments.GetEnumerator();
                while (it.MoveNext())
                {
                    if (infoStream != null)
                    {
                        infoStream.Write(" " + it.Current);
                    }
                }
                System.Collections.IEnumerator e = onlySegments.GetEnumerator();
                while (e.MoveNext() == true)
                {
                    result.segmentsChecked.Add(e.Current);
                }
                Msg(":");
            }

            if (skip)
            {
                Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
                result.toolOutOfDate = true;
                return(result);
            }


            result.newSegments = (SegmentInfos)sis.Clone();
            result.newSegments.Clear();

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = sis.Info(i);
                if (onlySegments != null && !onlySegments.Contains(info.name))
                {
                    continue;
                }
                Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
                result.segmentInfos.Add(segInfoStat);
                Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
                segInfoStat.name     = info.name;
                segInfoStat.docCount = info.docCount;

                int toLoseDocCount = info.docCount;

                SegmentReader reader = null;

                try
                {
                    Msg("    compound=" + info.GetUseCompoundFile());
                    segInfoStat.compound = info.GetUseCompoundFile();
                    Msg("    hasProx=" + info.GetHasProx());
                    segInfoStat.hasProx = info.GetHasProx();
                    Msg("    numFiles=" + info.Files().Count);
                    segInfoStat.numFiles = info.Files().Count;
                    Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
                    segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics();
                    segInfoStat.diagnostics = diagnostics;
                    if (diagnostics.Count > 0)
                    {
                        Msg("    diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics));
                    }

                    int docStoreOffset = info.GetDocStoreOffset();
                    if (docStoreOffset != -1)
                    {
                        Msg("    docStoreOffset=" + docStoreOffset);
                        segInfoStat.docStoreOffset = docStoreOffset;
                        Msg("    docStoreSegment=" + info.GetDocStoreSegment());
                        segInfoStat.docStoreSegment = info.GetDocStoreSegment();
                        Msg("    docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
                        segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
                    }
                    System.String delFileName = info.GetDelFileName();
                    if (delFileName == null)
                    {
                        Msg("    no deletions");
                        segInfoStat.hasDeletions = false;
                    }
                    else
                    {
                        Msg("    has deletions [delFileName=" + delFileName + "]");
                        segInfoStat.hasDeletions      = true;
                        segInfoStat.deletionsFileName = delFileName;
                    }
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: open reader.........");
                    }
                    reader = SegmentReader.Get(info);

                    segInfoStat.openReaderPassed = true;

                    int numDocs = reader.NumDocs();
                    toLoseDocCount = numDocs;
                    if (reader.HasDeletions())
                    {
                        if (reader.deletedDocs.Count() != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (reader.deletedDocs.Count() > reader.MaxDoc())
                        {
                            throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
                        }
                        if (info.docCount - numDocs != info.GetDelCount())
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        segInfoStat.numDeleted = info.docCount - numDocs;
                        Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                    }
                    else
                    {
                        if (info.GetDelCount() != 0)
                        {
                            throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
                        }
                        Msg("OK");
                    }
                    if (reader.MaxDoc() != info.docCount)
                    {
                        throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount);
                    }

                    // Test getFieldNames()
                    if (infoStream != null)
                    {
                        infoStream.Write("    test: fields..............");
                    }
                    System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
                    Msg("OK [" + fieldNames.Count + " fields]");
                    segInfoStat.numFields = fieldNames.Count;

                    // Test Field Norms
                    segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);

                    // Test the Term Index
                    segInfoStat.termIndexStatus = TestTermIndex(info, reader);

                    // Test Stored Fields
                    segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);

                    // Test Term Vectors
                    segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);

                    // Rethrow the first exception we encountered
                    //  This will cause stats for failed segments to be incremented properly
                    if (segInfoStat.fieldNormStatus.error != null)
                    {
                        throw new System.SystemException("Field Norm test failed");
                    }
                    else if (segInfoStat.termIndexStatus.error != null)
                    {
                        throw new System.SystemException("Term Index test failed");
                    }
                    else if (segInfoStat.storedFieldStatus.error != null)
                    {
                        throw new System.SystemException("Stored Field test failed");
                    }
                    else if (segInfoStat.termVectorStatus.error != null)
                    {
                        throw new System.SystemException("Term Vector test failed");
                    }

                    Msg("");
                }
                catch (System.Exception t)
                {
                    Msg("FAILED");
                    System.String comment;
                    comment = "fixIndex() would remove reference to this segment";
                    Msg("    WARNING: " + comment + "; full exception:");
                    if (infoStream != null)
                    {
                        infoStream.WriteLine(t.StackTrace);
                    }
                    Msg("");
                    result.totLoseDocCount += toLoseDocCount;
                    result.numBadSegments++;
                    continue;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }

                // Keeper
                result.newSegments.Add(info.Clone());
            }

            if (0 == result.numBadSegments)
            {
                result.clean = true;
                Msg("No problems were detected with this index.\n");
            }
            else
            {
                Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
            }

            return(result);
        }
Example #34
0
        public virtual void TestFixedPostings()
        {
            const int NUM_TERMS = 100;

            TermData[] terms = new TermData[NUM_TERMS];
            for (int i = 0; i < NUM_TERMS; i++)
            {
                int[]  docs = new int[] { i };
                string text = Convert.ToString(i);
                terms[i] = new TermData(this, text, docs, null);
            }

            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData field = new FieldData(this, "field", builder, terms, true, false);

            FieldData[] fields     = new FieldData[] { field };
            FieldInfos  fieldInfos = builder.Finish();

            // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
            using (Directory dir = NewDirectory())
            {
                this.Write(fieldInfos, dir, fields, true);
                Codec       codec = Codec.Default;
                SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

                // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
                using (FieldsProducer reader = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)))
                {
                    IEnumerator <string> fieldsEnum = reader.GetEnumerator();
                    fieldsEnum.MoveNext();
                    string fieldName = fieldsEnum.Current;
                    Assert.IsNotNull(fieldName);
                    Terms terms2 = reader.GetTerms(fieldName);
                    Assert.IsNotNull(terms2);

                    TermsEnum termsEnum = terms2.GetEnumerator();

                    DocsEnum docsEnum = null;
                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        Assert.IsTrue(termsEnum.MoveNext());
                        BytesRef term = termsEnum.Term;
                        Assert.AreEqual(terms[i].text2, term.Utf8ToString());

                        // do this twice to stress test the codec's reuse, ie,
                        // make sure it properly fully resets (rewinds) its
                        // internal state:
                        for (int iter = 0; iter < 2; iter++)
                        {
                            docsEnum = TestUtil.Docs(Random, termsEnum, null, docsEnum, DocsFlags.NONE);
                            Assert.AreEqual(terms[i].docs[0], docsEnum.NextDoc());
                            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                        }
                    }
                    Assert.IsFalse(termsEnum.MoveNext());

                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND);
                    }

                    Assert.IsFalse(fieldsEnum.MoveNext());
                }
            }
        }
Example #35
0
 internal static bool UsesCompoundFile(SegmentInfo si)
 {
     return(si.dir.FileExists(si.name + ".cfs"));
 }
Example #36
0
        /// <summary> Test the term index.</summary>
        private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
        {
            Status.TermIndexStatus status = new Status.TermIndexStatus();

            try
            {
                if (infoStream != null)
                {
                    infoStream.Write("    test: terms, freq, prox...");
                }

                TermEnum      termEnum      = reader.Terms();
                TermPositions termPositions = reader.TermPositions();

                // Used only to count up # deleted docs for this term
                MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);

                int maxDoc = reader.MaxDoc();

                while (termEnum.Next())
                {
                    status.termCount++;
                    Term term    = termEnum.Term();
                    int  docFreq = termEnum.DocFreq();
                    termPositions.Seek(term);
                    int lastDoc = -1;
                    int freq0   = 0;
                    status.totFreq += docFreq;
                    while (termPositions.Next())
                    {
                        freq0++;
                        int doc  = termPositions.Doc();
                        int freq = termPositions.Freq();
                        if (doc <= lastDoc)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
                        }
                        if (doc >= maxDoc)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
                        }

                        lastDoc = doc;
                        if (freq <= 0)
                        {
                            throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
                        }

                        int lastPos = -1;
                        status.totPos += freq;
                        for (int j = 0; j < freq; j++)
                        {
                            int pos = termPositions.NextPosition();
                            if (pos < -1)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                            }
                            if (pos < lastPos)
                            {
                                throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                            }
                        }
                    }

                    // Now count how many deleted docs occurred in
                    // this term:
                    int delCount;
                    if (reader.HasDeletions())
                    {
                        myTermDocs.Seek(term);
                        while (myTermDocs.Next())
                        {
                        }
                        delCount = myTermDocs.delCount;
                    }
                    else
                    {
                        delCount = 0;
                    }

                    if (freq0 + delCount != docFreq)
                    {
                        throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
                    }
                }

                Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
            }
            catch (System.Exception e)
            {
                Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
                status.error = e;
                if (infoStream != null)
                {
                    infoStream.WriteLine(e.StackTrace);
                }
            }

            return(status);
        }
Example #37
0
		public static SegmentReader Get(SegmentInfo si)
		{
			return Get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
		}
Example #38
0
        /// <summary>Returns true if this single info is optimized (has no
        /// pending norms or deletes, is in the same dir as the
        /// writer, and matches the current compound file setting
        /// </summary>
        private bool IsOptimized(SegmentInfo info)
        {
            bool hasDeletions = writer.NumDeletedDocs(info) > 0;

            return(!hasDeletions && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() && info.GetUseCompoundFile() == useCompoundFile);
        }
Example #39
0
		internal static SegmentReader Get(SegmentInfo si, int readBufferSize, bool doOpenStores, int termInfosIndexDivisor)
		{
			return Get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor);
		}
Example #40
0
		public static SegmentReader Get(SegmentInfo si)
		{
			return Get(si.dir, si, null, false, false);
		}
Example #41
0
		internal virtual SegmentReader ReopenSegment(SegmentInfo si, bool doClone, bool openReadOnly)
		{
			lock (this)
			{
				bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName()));
				bool normsUpToDate = true;
				
				bool[] fieldNormsChanged = new bool[core.fieldInfos.Size()];
				int fieldCount = core.fieldInfos.Size();
				for (int i = 0; i < fieldCount; i++)
				{
					if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i)))
					{
						normsUpToDate = false;
						fieldNormsChanged[i] = true;
					}
				}
				
				// if we're cloning we need to run through the reopenSegment logic
				// also if both old and new readers aren't readonly, we clone to avoid sharing modifications
				if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly)
				{
					return this;
				}
				
				// When cloning, the incoming SegmentInfos should not
				// have any changes in it:
				System.Diagnostics.Debug.Assert(!doClone ||(normsUpToDate && deletionsUpToDate));
				
				// clone reader
				SegmentReader clone;
				try
				{
					if (openReadOnly)
						clone = (SegmentReader) System.Activator.CreateInstance(READONLY_IMPL);
					else
						clone = (SegmentReader) System.Activator.CreateInstance(IMPL);
				}
				catch (System.Exception e)
				{
					throw new System.SystemException("cannot load SegmentReader class: " + e, e);
				}
				
				bool success = false;
				try
				{
					core.IncRef();
					clone.core = core;
					clone.readOnly = openReadOnly;
					clone.si = si;
					clone.readBufferSize = readBufferSize;
					
					if (!openReadOnly && hasChanges)
					{
						// My pending changes transfer to the new reader
						clone.pendingDeleteCount = pendingDeleteCount;
						clone.deletedDocsDirty = deletedDocsDirty;
						clone.normsDirty = normsDirty;
						clone.hasChanges = hasChanges;
						hasChanges = false;
					}
					
					if (doClone)
					{
						if (deletedDocs != null)
						{
							deletedDocsRef.IncRef();
							clone.deletedDocs = deletedDocs;
							clone.deletedDocsRef = deletedDocsRef;
						}
					}
					else
					{
						if (!deletionsUpToDate)
						{
							// load deleted docs
							System.Diagnostics.Debug.Assert(clone.deletedDocs == null);
							clone.LoadDeletedDocs();
						}
						else if (deletedDocs != null)
						{
							deletedDocsRef.IncRef();
							clone.deletedDocs = deletedDocs;
							clone.deletedDocsRef = deletedDocsRef;
						}
					}
					
					clone.SetDisableFakeNorms(GetDisableFakeNorms());
					clone.norms = new System.Collections.Hashtable();
					
					// Clone norms
					for (int i = 0; i < fieldNormsChanged.Length; i++)
					{
						
						// Clone unchanged norms to the cloned reader
						if (doClone || !fieldNormsChanged[i])
						{
							System.String curField = core.fieldInfos.FieldInfo(i).name;
							Norm norm = (Norm) this.norms[curField];
							if (norm != null)
								clone.norms[curField] = norm.Clone();
						}
					}
					
					// If we are not cloning, then this will open anew
					// any norms that have changed:
					clone.OpenNorms(si.GetUseCompoundFile()?core.GetCFSReader():Directory(), readBufferSize);
					
					success = true;
				}
				finally
				{
					if (!success)
					{
						// An exception occured during reopen, we have to decRef the norms
						// that we incRef'ed already and close singleNormsStream and FieldsReader
						clone.DecRef();
					}
				}
				
				return clone;
			}
		}
Example #42
0
		public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir)
		{
			SegmentReader instance;
			try
			{
				instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("cannot load SegmentReader class: " + e, e);
			}
			instance.Init(dir, sis, closeDir, ownDir);
			instance.Initialize(si);
			return instance;
		}
		// got the idea for this from George's note in TestSegmentReader
		// it seems that JUnit creates a new instance of the class for each test invocation
		// while NUnit does not (seems like a flaw in JUnit, to be honest)
		// forcing the re-init of the variables for each run solves the problem
		private void SetUpInternal()
		{
			dir = new RAMDirectory();
			testDoc = new Lucene.Net.Documents.Document();
			info = null;
		}
Example #44
0
		internal static bool HasDeletions(SegmentInfo si)
		{
			return si.HasDeletions();
		}
Example #45
0
 /// <summary> Test the term index.</summary>
 private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
 {
     var status = new Status.TermIndexStatus();
     
     try
     {
         if (infoStream != null)
         {
             infoStream.Write("    test: terms, freq, prox...");
         }
         
         TermEnum termEnum = reader.Terms();
         TermPositions termPositions = reader.TermPositions();
         
         // Used only to count up # deleted docs for this term
         var myTermDocs = new MySegmentTermDocs(reader);
         
         int maxDoc = reader.MaxDoc;
         
         while (termEnum.Next())
         {
             status.termCount++;
             Term term = termEnum.Term;
             int docFreq = termEnum.DocFreq();
             termPositions.Seek(term);
             int lastDoc = - 1;
             int freq0 = 0;
             status.totFreq += docFreq;
             while (termPositions.Next())
             {
                 freq0++;
                 int doc = termPositions.Doc;
                 int freq = termPositions.Freq;
                 if (doc <= lastDoc)
                 {
                     throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
                 }
                 if (doc >= maxDoc)
                 {
                     throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
                 }
                 
                 lastDoc = doc;
                 if (freq <= 0)
                 {
                     throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
                 }
                 
                 int lastPos = - 1;
                 status.totPos += freq;
                 for (int j = 0; j < freq; j++)
                 {
                     int pos = termPositions.NextPosition();
                     if (pos < - 1)
                     {
                         throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                     }
                     if (pos < lastPos)
                     {
                         throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                     }
                     lastPos = pos;
                 }
             }
             
             // Now count how many deleted docs occurred in
             // this term:
             int delCount;
             if (reader.HasDeletions)
             {
                 myTermDocs.Seek(term);
                 while (myTermDocs.Next())
                 {
                 }
                 delCount = myTermDocs.delCount;
             }
             else
             {
                 delCount = 0;
             }
             
             if (freq0 + delCount != docFreq)
             {
                 throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
             }
         }
         
         Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
     }
     catch (System.Exception e)
     {
         Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
         status.error = e;
         if (infoStream != null)
         {
             infoStream.WriteLine(e.StackTrace);
         }
     }
     
     return status;
 }
Example #46
0
		internal static bool HasSeparateNorms(SegmentInfo si)
		{
			return si.HasSeparateNorms();
		}
Example #47
0
 /// <summary> Test term vectors for a segment.</summary>
 private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
 {
     var status = new Status.TermVectorStatus();
     
     try
     {
         if (infoStream != null)
         {
             infoStream.Write("    test: term vectors........");
         }
         
         for (int j = 0; j < info.docCount; ++j)
         {
             if (!reader.IsDeleted(j))
             {
                 status.docCount++;
                 ITermFreqVector[] tfv = reader.GetTermFreqVectors(j);
                 if (tfv != null)
                 {
                     status.totVectors += tfv.Length;
                 }
             }
         }
         
         Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
     }
     catch (System.Exception e)
     {
         Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
         status.error = e;
         if (infoStream != null)
         {
             infoStream.WriteLine(e.StackTrace);
         }
     }
     
     return status;
 }
Example #48
0
			internal void  ReWrite(SegmentInfo si)
			{
				// NOTE: norms are re-written in regular directory, not cfs
				
				System.String oldFileName = si.GetNormFileName(this.number);
				if (oldFileName != null && !oldFileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
				{
					// Mark this file for deletion.  Note that we don't
					// actually try to delete it until the new segments files is
					// successfully written:
					Enclosing_Instance.deleter.AddPendingFile(oldFileName);
				}
				
				si.AdvanceNormGen(this.number);
				IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(si.GetNormFileName(this.number));
				try
				{
					out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
				}
				finally
				{
					out_Renamed.Close();
				}
				this.dirty = false;
			}
Example #49
0
 protected internal override long Size(SegmentInfo info, IState state)
 {
     return(SizeDocs(info, state));
 }
Example #50
0
 internal virtual void  SetSegmentInfo(SegmentInfo info)
 {
     si = info;
 }
Example #51
0
		public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
		{
			return Get(si.dir, si, sis, closeDir, true);
		}
Example #52
0
 public FlushFailedEvent(SegmentInfo info)
 {
     this.info = info;
 }
Example #53
0
		private void  Initialize(SegmentInfo si)
		{
			segment = si.name;
			this.si = si;
			
			bool success = false;
			
			try
			{
				// Use compound file directory for some files, if it exists
				Directory cfsDir = Directory();
				if (si.GetUseCompoundFile())
				{
					cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
					cfsDir = cfsReader;
				}
				
				// No compound file exists - use the multi-file format
				fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
				fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
				
				// Verify two sources of "maxDoc" agree:
				if (fieldsReader.Size() != si.docCount)
				{
					throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
				}
				
				tis = new TermInfosReader(cfsDir, segment, fieldInfos);
				
				// NOTE: the bitvector is stored using the regular directory, not cfs
				if (HasDeletions(si))
				{
					deletedDocs = new BitVector(Directory(), si.GetDelFileName());
					
					// Verify # deletes does not exceed maxDoc for this segment:
					if (deletedDocs.Count() > MaxDoc())
					{
						throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
					}
				}
				
				// make sure that all index files have been read or are kept open
				// so that if an index update removes them we'll still have them
				freqStream = cfsDir.OpenInput(segment + ".frq");
				proxStream = cfsDir.OpenInput(segment + ".prx");
				OpenNorms(cfsDir);
				
				if (fieldInfos.HasVectors())
				{
					// open term vector files only as needed
					termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
				}
				success = true;
			}
			finally
			{
				
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above.  In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					DoClose();
				}
			}
		}
Example #54
0
 /// <summary> Returns true if a newly flushed (not from merge)
 /// segment should use the compound file format.
 /// </summary>
 public abstract bool UseCompoundFile(SegmentInfos segments, SegmentInfo newSegment);
Example #55
0
		internal static bool UsesCompoundFile(SegmentInfo si)
		{
			return si.GetUseCompoundFile();
		}
Example #56
0
 // Javadoc inherited
 public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info)
 {
     return(useCompoundFile);
 }
Example #57
0
		internal virtual void  SetSegmentInfo(SegmentInfo info)
		{
			si = info;
		}
Example #58
0
 abstract protected internal long Size(SegmentInfo info);
Example #59
0
		/// <summary> Returns true if a newly flushed (not from merge)
		/// segment should use the compound file format.
		/// </summary>
		public abstract bool UseCompoundFile(SegmentInfos segments, SegmentInfo newSegment);
Example #60
0
        /// <summary>Returns the merges necessary to optimize the index.
        /// This merge policy defines "optimized" to mean only one
        /// segment in the index, where that segment has no
        /// deletions pending nor separate norms, and it is in
        /// compound file format if the current useCompoundFile
        /// setting is true.  This method returns multiple merges
        /// (mergeFactor at a time) so the {@link MergeScheduler}
        /// in use may make use of concurrency.
        /// </summary>
        public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize)
        {
            MergeSpecification spec;

            System.Diagnostics.Debug.Assert(maxNumSegments > 0);

            if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
            {
                // Find the newest (rightmost) segment that needs to
                // be optimized (other segments may have been flushed
                // since optimize started):
                int last = infos.Count;
                while (last > 0)
                {
                    SegmentInfo info = infos.Info(--last);
                    if (segmentsToOptimize.Contains(info))
                    {
                        last++;
                        break;
                    }
                }

                if (last > 0)
                {
                    spec = new MergeSpecification();

                    // First, enroll all "full" merges (size
                    // mergeFactor) to potentially be run concurrently:
                    while (last - maxNumSegments + 1 >= mergeFactor)
                    {
                        spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last)));
                        last -= mergeFactor;
                    }

                    // Only if there are no full merges pending do we
                    // add a final partial (< mergeFactor segments) merge:
                    if (0 == spec.merges.Count)
                    {
                        if (maxNumSegments == 1)
                        {
                            // Since we must optimize down to 1 segment, the
                            // choice is simple:
                            if (last > 1 || !IsOptimized(infos.Info(0)))
                            {
                                spec.Add(MakeOneMerge(infos, infos.Range(0, last)));
                            }
                        }
                        else if (last > maxNumSegments)
                        {
                            // Take care to pick a partial merge that is
                            // least cost, but does not make the index too
                            // lopsided.  If we always just picked the
                            // partial tail then we could produce a highly
                            // lopsided index over time:

                            // We must merge this many segments to leave
                            // maxNumSegments in the index (from when
                            // optimize was first kicked off):
                            int finalMergeSize = last - maxNumSegments + 1;

                            // Consider all possible starting points:
                            long bestSize  = 0;
                            int  bestStart = 0;

                            for (int i = 0; i < last - finalMergeSize + 1; i++)
                            {
                                long sumSize = 0;
                                for (int j = 0; j < finalMergeSize; j++)
                                {
                                    sumSize += Size(infos.Info(j + i));
                                }
                                if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
                                {
                                    bestStart = i;
                                    bestSize  = sumSize;
                                }
                            }

                            spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize)));
                        }
                    }
                }
                else
                {
                    spec = null;
                }
            }
            else
            {
                spec = null;
            }

            return(spec);
        }