Пример #1
0
        private void  AssertSameSeekBehavior(System.String msg, IndexInput expected, IndexInput actual)
        {
            // seek to 0
            long point = 0;

            AssertSameStreams(msg + ", seek(0)", expected, actual, point);

            // seek to middle
            point = expected.Length() / 2L;
            AssertSameStreams(msg + ", seek(mid)", expected, actual, point);

            // seek to end - 2
            point = expected.Length() - 2;
            AssertSameStreams(msg + ", seek(end-2)", expected, actual, point);

            // seek to end - 1
            point = expected.Length() - 1;
            AssertSameStreams(msg + ", seek(end-1)", expected, actual, point);

            // seek to the end
            point = expected.Length();
            AssertSameStreams(msg + ", seek(end)", expected, actual, point);

            // seek past end
            point = expected.Length() + 1;
            AssertSameStreams(msg + ", seek(end+1)", expected, actual, point);
        }
        /// <summary>Returns the length in bytes of each raw document in a
        /// contiguous range of length numDocs starting with
        /// startDocID.  Returns the IndexInput (the fieldStream),
        /// already seeked to the starting point for startDocID.
        /// </summary>
        internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs)
        {
            SeekIndex(startDocID);
            long startOffset = indexStream.ReadLong();
            long lastOffset  = startOffset;
            int  count       = 0;

            while (count < numDocs)
            {
                long offset;
                int  docID = docStoreOffset + startDocID + count + 1;
                System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
                if (docID < numTotalDocs)
                {
                    offset = indexStream.ReadLong();
                }
                else
                {
                    offset = fieldsStream.Length();
                }
                lengths[count++] = (int)(offset - lastOffset);
                lastOffset       = offset;
            }

            fieldsStream.Seek(startOffset);

            return(fieldsStream);
        }
Пример #3
0
        /// <summary>Retrieve the length (in bytes) of the tvd and tvf
        /// entries for the next numDocs starting with
        /// startDocID.  This is used for bulk copying when
        /// merging segments, if the field numbers are
        /// congruent.  Once this returns, the tvf &amp; tvd streams
        /// are seeked to the startDocID.
        /// </summary>
        internal void  RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs)
        {
            if (tvx == null)
            {
                for (int i = 0; i < tvdLengths.Length; i++)
                {
                    tvdLengths[i] = 0;
                }
                for (int i = 0; i < tvfLengths.Length; i++)
                {
                    tvfLengths[i] = 0;
                }
                return;
            }

            // SegmentMerger calls canReadRawDocs() first and should
            // not call us if that returns false.
            if (format < FORMAT_VERSION2)
            {
                throw new System.SystemException("cannot read raw docs with older term vector formats");
            }

            SeekTvx(startDocID);

            long tvdPosition = tvx.ReadLong();

            tvd.Seek(tvdPosition);

            long tvfPosition = tvx.ReadLong();

            tvf.Seek(tvfPosition);

            long lastTvdPosition = tvdPosition;
            long lastTvfPosition = tvfPosition;

            int count = 0;

            while (count < numDocs)
            {
                int docID = docStoreOffset + startDocID + count + 1;
                System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
                if (docID < numTotalDocs)
                {
                    tvdPosition = tvx.ReadLong();
                    tvfPosition = tvx.ReadLong();
                }
                else
                {
                    tvdPosition = tvd.Length();
                    tvfPosition = tvf.Length();
                    System.Diagnostics.Debug.Assert(count == numDocs - 1);
                }
                tvdLengths[count] = (int)(tvdPosition - lastTvdPosition);
                tvfLengths[count] = (int)(tvfPosition - lastTvfPosition);
                count++;
                lastTvdPosition = tvdPosition;
                lastTvfPosition = tvfPosition;
            }
        }
Пример #4
0
        internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;

            try
            {
                if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
                {
                    tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
                    CheckValidFormat(tvx);
                    tvd       = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
                    tvdFormat = CheckValidFormat(tvd);
                    tvf       = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
                    tvfFormat = CheckValidFormat(tvf);
                    if (-1 == docStoreOffset)
                    {
                        this.docStoreOffset = 0;
                        this.size           = (int)(tvx.Length() >> 3);
                    }
                    else
                    {
                        this.docStoreOffset = docStoreOffset;
                        this.size           = size;
                        // Verify the file is long enough to hold all of our
                        // docs
                        System.Diagnostics.Debug.Assert(((int)(tvx.Length() / 8)) >= size + docStoreOffset);
                    }
                }

                this.fieldInfos = fieldInfos;
                success         = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Close();
                }
            }
        }
		internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
		{
			bool success = false;
			
			try
			{
				if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
				{
					tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
					CheckValidFormat(tvx);
					tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
					tvdFormat = CheckValidFormat(tvd);
					tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
					tvfFormat = CheckValidFormat(tvf);
					if (- 1 == docStoreOffset)
					{
						this.docStoreOffset = 0;
						this.size = (int) (tvx.Length() >> 3);
					}
					else
					{
						this.docStoreOffset = docStoreOffset;
						this.size = size;
						// Verify the file is long enough to hold all of our
						// docs
						System.Diagnostics.Debug.Assert(((int) (tvx.Length() / 8)) >= size + docStoreOffset);
					}
				}
				
				this.fieldInfos = fieldInfos;
				success = true;
			}
			finally
			{
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above. In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					Close();
				}
			}
		}
        /*internal*/
        public FieldsReader(Directory d, System.String segment, FieldInfos fn)
        {
            fieldInfos = fn;

            fieldsStream = d.OpenInput(segment + ".fdt");
            indexStream = d.OpenInput(segment + ".fdx");

            size = (int) (indexStream.Length() / 8);
        }
Пример #7
0
        public virtual void  TestReadPastEOF()
        {
            SetUp_2();
            CompoundFileReader cr         = new CompoundFileReader(dir, "f.comp", null);
            IndexInput         is_Renamed = cr.OpenInput("f2", null);

            is_Renamed.Seek(is_Renamed.Length(null) - 10, null);
            byte[] b = new byte[100];
            is_Renamed.ReadBytes(b, 0, 10, null);

            Assert.Throws <System.IO.IOException>(() => is_Renamed.ReadByte(null), "Single byte read past end of file");

            is_Renamed.Seek(is_Renamed.Length(null) - 10, null);
            Assert.Throws <System.IO.IOException>(() => is_Renamed.ReadBytes(b, 0, 50, null), "Block read past end of file");

            is_Renamed.Close();
            cr.Close();
        }
Пример #8
0
        public /*internal*/ FieldsReader(Directory d, System.String segment, FieldInfos fn)
        {
            fieldInfos = fn;

            fieldsStream = d.OpenInput(segment + ".fdt");
            indexStream  = d.OpenInput(segment + ".fdx");

            size = (int)(indexStream.Length() / 8);
        }
Пример #9
0
        public FieldsReader(Directory d, System.String segment, FieldInfos fn)
        {
            fieldInfos = fn;

            cloneableFieldsStream = d.OpenInput(segment + ".fdt");
            fieldsStream          = (IndexInput)cloneableFieldsStream.Clone();
            indexStream           = d.OpenInput(segment + ".fdx");
            size = (int)(indexStream.Length() / 8);
        }
Пример #10
0
 private void  AssertSameStreams(System.String msg, IndexInput expected, IndexInput actual, long seekTo)
 {
     if (seekTo >= 0 && seekTo < expected.Length())
     {
         expected.Seek(seekTo);
         actual.Seek(seekTo);
         AssertSameStreams(msg + ", seek(mid)", expected, actual);
     }
 }
Пример #11
0
        internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;

            try
            {
                fieldInfos = fn;

                cloneableFieldsStream = d.OpenInput(segment + ".fdt", readBufferSize);
                fieldsStream          = (IndexInput)cloneableFieldsStream.Clone();
                indexStream           = d.OpenInput(segment + ".fdx", readBufferSize);

                if (docStoreOffset != -1)
                {
                    // We read only a slice out of this shared fields file
                    this.docStoreOffset = docStoreOffset;
                    this.size           = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    System.Diagnostics.Debug.Assert(((int)(indexStream.Length() / 8)) >= size + this.docStoreOffset);
                }
                else
                {
                    this.docStoreOffset = 0;
                    this.size           = (int)(indexStream.Length() >> 3);
                }

                numTotalDocs = (int)(indexStream.Length() >> 3);
                success      = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Close();
                }
            }
        }
Пример #12
0
        public CompoundFileReader(Directory dir, string name, int readBufferSize)
        {
            directory           = dir;
            fileName            = name;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                stream = dir.OpenInput(name, readBufferSize);

                // read the directory and init files
                int       count = stream.ReadVInt();
                FileEntry entry = null;
                for (int i = 0; i < count; i++)
                {
                    long   offset = stream.ReadLong();
                    string id     = stream.ReadString();

                    if (entry != null)
                    {
                        // set length of the previous entry
                        entry.length = offset - entry.offset;
                    }

                    entry = new FileEntry {
                        offset = offset
                    };
                    entries[id] = entry;
                }

                // set the length of the final entry
                if (entry != null)
                {
                    entry.length = stream.Length() - entry.offset;
                }

                success = true;
            }
            finally
            {
                if (!success && (stream != null))
                {
                    try
                    {
                        stream.Close();
                    }
                    catch (System.IO.IOException)
                    {
                    }
                }
            }
        }
Пример #13
0
        private void  AssertSameStreams(System.String msg, IndexInput expected, IndexInput test)
        {
            Assert.IsNotNull(expected, msg + " null expected");
            Assert.IsNotNull(test, msg + " null test");
            Assert.AreEqual(expected.Length(), test.Length(), msg + " length");
            Assert.AreEqual(expected.GetFilePointer(), test.GetFilePointer(), msg + " position");

            byte[] expectedBuffer = new byte[512];
            byte[] testBuffer     = new byte[expectedBuffer.Length];

            long remainder = expected.Length() - expected.GetFilePointer();

            while (remainder > 0)
            {
                int readLen = (int)System.Math.Min(remainder, expectedBuffer.Length);
                expected.ReadBytes(expectedBuffer, 0, readLen);
                test.ReadBytes(testBuffer, 0, readLen);
                AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen);
                remainder -= readLen;
            }
        }
Пример #14
0
        private void  Read(IndexInput input, System.String fileName)
        {
            int firstInt = input.ReadVInt();

            if (firstInt < 0)
            {
                // This is a real format
                format = firstInt;
            }
            else
            {
                format = FORMAT_PRE;
            }

            if (format != FORMAT_PRE & format != FORMAT_START)
            {
                throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
            }

            int size;

            if (format == FORMAT_PRE)
            {
                size = firstInt;
            }
            else
            {
                size = input.ReadVInt();                 //read in the size
            }

            for (int i = 0; i < size; i++)
            {
                System.String name            = StringHelper.Intern(input.ReadString());
                byte          bits            = input.ReadByte();
                bool          isIndexed       = (bits & IS_INDEXED) != 0;
                bool          storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                bool          storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                bool          storeOffsetWithTermVector    = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
                bool          omitNorms                = (bits & OMIT_NORMS) != 0;
                bool          storePayloads            = (bits & STORE_PAYLOADS) != 0;
                bool          omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;

                AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
            }

            if (input.GetFilePointer() != input.Length())
            {
                throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length());
            }
        }
Пример #15
0
        public void  Read(Directory directory)
        {
            IndexInput input = directory.OpenInput(IndexFileNames.SEGMENTS);

            try
            {
                int format = input.ReadInt();
                if (format < 0)
                {
                    // file contains explicit format info
                    // check that it is a format we can understand
                    if (format < FORMAT)
                    {
                        throw new System.IO.IOException("Unknown format version: " + format);
                    }
                    version = input.ReadLong();                    // read version
                    counter = input.ReadInt();                     // read counter
                }
                else
                {
                    // file is in old format without explicit format info
                    counter = format;
                }

                for (int i = input.ReadInt(); i > 0; i--)
                {
                    // read segmentInfos
                    SegmentInfo si = new SegmentInfo(input.ReadString(), input.ReadInt(), directory);
                    Add(si);
                }

                if (format >= 0)
                {
                    // in old format the version number may be at the end of the file
                    if (input.GetFilePointer() >= input.Length())
                    {
                        version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
                    }
                    // old file format without version number
                    else
                    {
                        version = input.ReadLong();                         // read version
                    }
                }
            }
            finally
            {
                input.Close();
            }
        }
Пример #16
0
        /// <summary>Copy the contents of the file with specified extension into the
        /// provided output stream. Use the provided buffer for moving data
        /// to reduce memory allocation.
        /// </summary>
        private void  CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
        {
            IndexInput is_Renamed = null;

            try
            {
                long startPtr = os.GetFilePointer();

                is_Renamed = directory.OpenInput(source.file);
                long length    = is_Renamed.Length();
                long remainder = length;
                int  chunk     = buffer.Length;

                while (remainder > 0)
                {
                    int len = (int)System.Math.Min(chunk, remainder);
                    is_Renamed.ReadBytes(buffer, 0, len, false);
                    os.WriteBytes(buffer, len);
                    remainder -= len;
                    if (checkAbort != null)
                    {
                        // Roughly every 2 MB we will check if
                        // it's time to abort
                        checkAbort.Work(80);
                    }
                }

                // Verify that remainder is 0
                if (remainder != 0)
                {
                    throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
                }

                // Verify that the output length diff is equal to original file
                long endPtr = os.GetFilePointer();
                long diff   = endPtr - startPtr;
                if (diff != length)
                {
                    throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
                }
            }
            finally
            {
                if (is_Renamed != null)
                {
                    is_Renamed.Close();
                }
            }
        }
Пример #17
0
        public virtual void  TestReadPastEOF()
        {
            SetUp_2();
            CompoundFileReader cr         = new CompoundFileReader(dir, "f.comp");
            IndexInput         is_Renamed = cr.OpenInput("f2");

            is_Renamed.Seek(is_Renamed.Length() - 10);
            byte[] b = new byte[100];
            is_Renamed.ReadBytes(b, 0, 10);

            try
            {
                byte test = is_Renamed.ReadByte();
                Assert.Fail("Single byte read past end of file");
            }
            catch (System.IO.IOException e)
            {
                /* success */
                //System.out.println("SUCCESS: single byte read past end of file: " + e);
            }

            is_Renamed.Seek(is_Renamed.Length() - 10);
            try
            {
                is_Renamed.ReadBytes(b, 0, 50);
                Assert.Fail("Block read past end of file");
            }
            catch (System.IO.IOException e)
            {
                /* success */
                //System.out.println("SUCCESS: block read past end of file: " + e);
            }

            is_Renamed.Close();
            cr.Close();
        }
Пример #18
0
        public /*internal*/ TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos)
        {
            if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION))
            {
                tvx = d.OpenInput(segment + TermVectorsWriter.TVX_EXTENSION);
                CheckValidFormat(tvx);
                tvd       = d.OpenInput(segment + TermVectorsWriter.TVD_EXTENSION);
                tvdFormat = CheckValidFormat(tvd);
                tvf       = d.OpenInput(segment + TermVectorsWriter.TVF_EXTENSION);
                tvfFormat = CheckValidFormat(tvf);
                size      = (int)tvx.Length() / 8;
            }

            this.fieldInfos = fieldInfos;
        }
Пример #19
0
        /*internal*/
        public TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos)
        {
            if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION))
            {
                tvx = d.OpenInput(segment + TermVectorsWriter.TVX_EXTENSION);
                CheckValidFormat(tvx);
                tvd = d.OpenInput(segment + TermVectorsWriter.TVD_EXTENSION);
                tvdFormat = CheckValidFormat(tvd);
                tvf = d.OpenInput(segment + TermVectorsWriter.TVF_EXTENSION);
                tvfFormat = CheckValidFormat(tvf);
                size = (int) tvx.Length() / 8;
            }

            this.fieldInfos = fieldInfos;
        }
Пример #20
0
        public virtual void  CopyFile(Directory dir, System.String src, System.String dest)
        {
            IndexInput  in_Renamed  = dir.OpenInput(src);
            IndexOutput out_Renamed = dir.CreateOutput(dest);

            byte[] b         = new byte[1024];
            long   remainder = in_Renamed.Length();

            while (remainder > 0)
            {
                int len = (int)System.Math.Min(b.Length, remainder);
                in_Renamed.ReadBytes(b, 0, len);
                out_Renamed.WriteBytes(b, len);
                remainder -= len;
            }
            in_Renamed.Close();
            out_Renamed.Close();
        }
        /// <summary>
        /// Sole constructor. </summary>
        public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context)
        {
            string segment = si.Name;
            bool success = false;
            FieldInfos = fn;
            try
            {
                FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context);
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
                IndexStream = d.OpenInput(indexStreamFN, context);

                CodecUtil.CheckHeader(IndexStream, Lucene40StoredFieldsWriter.CODEC_NAME_IDX, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT);
                CodecUtil.CheckHeader(FieldsStream, Lucene40StoredFieldsWriter.CODEC_NAME_DAT, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT);
                Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_DAT == FieldsStream.FilePointer);
                Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX == IndexStream.FilePointer);
                long indexSize = IndexStream.Length() - Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX;
                this.Size_Renamed = (int)(indexSize >> 3);
                // Verify two sources of "maxDoc" agree:
                if (this.Size_Renamed != si.DocCount)
                {
                    throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size_Renamed + " but segmentInfo shows " + si.DocCount);
                }
                NumTotalDocs = (int)(indexSize >> 3);
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // ensure we throw our original exception
                    catch (Exception)
                    {
                    }
                }
            }
        }
Пример #22
0
        internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;
            isOriginal = true;
            try
            {
                fieldInfos = fn;

                cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
                cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);

                // First version of fdx did not include a format
                // header, but, the first int will always be 0 in that
                // case
                int firstInt = cloneableIndexStream.ReadInt();
                format = firstInt == 0 ? 0 : firstInt;

                if (format > FieldsWriter.FORMAT_CURRENT)
                    throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");

                formatSize = format > FieldsWriter.FORMAT ? 4 : 0;

                if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                    cloneableFieldsStream.SetModifiedUTF8StringsMode();

                fieldsStream = (IndexInput) cloneableFieldsStream.Clone();

                long indexSize = cloneableIndexStream.Length() - formatSize;

                if (docStoreOffset != - 1)
                {
                    // We read only a slice out of this shared fields file
                    this.docStoreOffset = docStoreOffset;
                    this.size = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }
                else
                {
                    this.docStoreOffset = 0;
                    this.size = (int) (indexSize >> 3);
                }

                indexStream = (IndexInput) cloneableIndexStream.Clone();
                numTotalDocs = (int) (indexSize >> 3);
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Dispose();
                }
            }
        }
Пример #23
0
		internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
		{
			bool success = false;
			
			try
			{
				fieldInfos = fn;
				
				cloneableFieldsStream = d.OpenInput(segment + ".fdt", readBufferSize);
				fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
				indexStream = d.OpenInput(segment + ".fdx", readBufferSize);
				
				if (docStoreOffset != - 1)
				{
					// We read only a slice out of this shared fields file
					this.docStoreOffset = docStoreOffset;
					this.size = size;
					
					// Verify the file is long enough to hold all of our
					// docs
					System.Diagnostics.Debug.Assert(((int)(indexStream.Length() / 8)) >= size + this.docStoreOffset);
				}
				else
				{
					this.docStoreOffset = 0;
					this.size = (int) (indexStream.Length() >> 3);
				}
				
				numTotalDocs = (int) (indexStream.Length() >> 3);
				success = true;
			}
			finally
			{
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above. In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					Close();
				}
			}
		}
Пример #24
0
        /// <summary> Read a particular segmentFileName.  Note that this may
        /// throw an IOException if a commit is in process.
        ///
        /// </summary>
        /// <param name="directory">-- directory containing the segments file
        /// </param>
        /// <param name="segmentFileName">-- segment file to load
        /// </param>
        public void  Read(Directory directory, System.String segmentFileName)
        {
            bool success = false;

            IndexInput input = directory.OpenInput(segmentFileName);

            if (segmentFileName.Equals(IndexFileNames.SEGMENTS))
            {
                generation = 0;
            }
            else
            {
#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
                generation = Lucene.Net.Documents.NumberTools.ToLong(segmentFileName.Substring(1 + IndexFileNames.SEGMENTS.Length));
#else
                generation = System.Convert.ToInt64(segmentFileName.Substring(1 + IndexFileNames.SEGMENTS.Length), 16);
#endif
            }
            lastGeneration = generation;

            try
            {
                int format = input.ReadInt();
                if (format < 0)
                {
                    // file contains explicit format info
                    // check that it is a format we can understand
                    if (format < FORMAT_SINGLE_NORM_FILE)
                    {
                        throw new System.IO.IOException("Unknown format version: " + format);
                    }
                    version = input.ReadLong();                    // read version
                    counter = input.ReadInt();                     // read counter
                }
                else
                {
                    // file is in old format without explicit format info
                    counter = format;
                }

                for (int i = input.ReadInt(); i > 0; i--)
                {
                    // read segmentInfos
                    Add(new SegmentInfo(directory, format, input));
                }

                if (format >= 0)
                {
                    // in old format the version number may be at the end of the file
                    if (input.GetFilePointer() >= input.Length())
                    {
                        version = System.DateTime.Now.Millisecond;
                    }
                    // old file format without version number
                    else
                    {
                        version = input.ReadLong();                         // read version
                    }
                }
                success = true;
            }
            finally
            {
                input.Close();
                if (!success)
                {
                    // Clear any segment infos we had loaded so we
                    // have a clean slate on retry:
                    Clear();
                }
            }
        }
Пример #25
0
 private void  AssertSameSeekBehavior(System.String msg, IndexInput expected, IndexInput actual)
 {
     // seek to 0
     long point = 0;
     AssertSameStreams(msg + ", seek(0)", expected, actual, point);
     
     // seek to middle
     point = expected.Length() / 2L;
     AssertSameStreams(msg + ", seek(mid)", expected, actual, point);
     
     // seek to end - 2
     point = expected.Length() - 2;
     AssertSameStreams(msg + ", seek(end-2)", expected, actual, point);
     
     // seek to end - 1
     point = expected.Length() - 1;
     AssertSameStreams(msg + ", seek(end-1)", expected, actual, point);
     
     // seek to the end
     point = expected.Length();
     AssertSameStreams(msg + ", seek(end)", expected, actual, point);
     
     // seek past end
     point = expected.Length() + 1;
     AssertSameStreams(msg + ", seek(end+1)", expected, actual, point);
 }
Пример #26
0
		private void  Read(IndexInput input, System.String fileName)
		{
			int firstInt = input.ReadVInt();
			
			if (firstInt < 0)
			{
				// This is a real format
				format = firstInt;
			}
			else
			{
				format = FORMAT_PRE;
			}
			
			if (format != FORMAT_PRE & format != FORMAT_START)
			{
				throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
			}
			
			int size;
			if (format == FORMAT_PRE)
			{
				size = firstInt;
			}
			else
			{
				size = input.ReadVInt(); //read in the size
			}
			
			for (int i = 0; i < size; i++)
			{
				System.String name = StringHelper.Intern(input.ReadString());
				byte bits = input.ReadByte();
				bool isIndexed = (bits & IS_INDEXED) != 0;
				bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
				bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
				bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
				bool omitNorms = (bits & OMIT_NORMS) != 0;
				bool storePayloads = (bits & STORE_PAYLOADS) != 0;
				bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
				
				AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
			}
			
			if (input.GetFilePointer() != input.Length())
			{
				throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length());
			}
		}
Пример #27
0
 private void  AssertSameStreams(System.String msg, IndexInput expected, IndexInput test)
 {
     Assert.IsNotNull(expected, msg + " null expected");
     Assert.IsNotNull(test, msg + " null test");
     Assert.AreEqual(expected.Length(), test.Length(), msg + " length");
     Assert.AreEqual(expected.FilePointer, test.FilePointer, msg + " position");
     
     byte[] expectedBuffer = new byte[512];
     byte[] testBuffer = new byte[expectedBuffer.Length];
     
     long remainder = expected.Length() - expected.FilePointer;
     while (remainder > 0)
     {
         int readLen = (int) System.Math.Min(remainder, expectedBuffer.Length);
         expected.ReadBytes(expectedBuffer, 0, readLen);
         test.ReadBytes(testBuffer, 0, readLen);
         AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen);
         remainder -= readLen;
     }
 }
Пример #28
0
 private void  AssertSameStreams(System.String msg, IndexInput expected, IndexInput actual, long seekTo)
 {
     if (seekTo >= 0 && seekTo < expected.Length())
     {
         expected.Seek(seekTo);
         actual.Seek(seekTo);
         AssertSameStreams(msg + ", seek(mid)", expected, actual);
     }
 }
Пример #29
0
		internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
		{
			bool success = false;
			
			try
			{
                if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
                {
                    tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
                    format = CheckValidFormat(tvx);
                    tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
                    int tvdFormat = CheckValidFormat(tvd);
                    tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
                    int tvfFormat = CheckValidFormat(tvf);

                    System.Diagnostics.Debug.Assert(format == tvdFormat);
                    System.Diagnostics.Debug.Assert(format == tvfFormat);

                    if (format >= FORMAT_VERSION2)
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 4);
                    }
                    else
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 3);
                    }

                    if (-1 == docStoreOffset)
                    {
                        this.docStoreOffset = 0;
                        this.size = numTotalDocs;
                        System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size);
                    }
                    else
                    {
                        this.docStoreOffset = docStoreOffset;
                        this.size = size;
                        // Verify the file is long enough to hold all of our
                        // docs
                        System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset);
                    }
                }
                else
                {
                    // If all documents flushed in a segment had hit
                    // non-aborting exceptions, it's possible that
                    // FieldInfos.hasVectors returns true yet the term
                    // vector files don't exist.
                    format = 0;
                }

				
				this.fieldInfos = fieldInfos;
				success = true;
			}
			finally
			{
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above. In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					Close();
				}
			}
		}
Пример #30
0
        internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;

            try
            {
                if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
                {
                    tvx    = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
                    format = CheckValidFormat(tvx);
                    tvd    = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
                    int tvdFormat = CheckValidFormat(tvd);
                    tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
                    int tvfFormat = CheckValidFormat(tvf);

                    System.Diagnostics.Debug.Assert(format == tvdFormat);
                    System.Diagnostics.Debug.Assert(format == tvfFormat);

                    if (format >= FORMAT_VERSION2)
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 4);
                    }
                    else
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 3);
                    }

                    if (-1 == docStoreOffset)
                    {
                        this.docStoreOffset = 0;
                        this.size           = numTotalDocs;
                        System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size);
                    }
                    else
                    {
                        this.docStoreOffset = docStoreOffset;
                        this.size           = size;
                        // Verify the file is long enough to hold all of our
                        // docs
                        System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset);
                    }
                }
                else
                {
                    // If all documents flushed in a segment had hit
                    // non-aborting exceptions, it's possible that
                    // FieldInfos.hasVectors returns true yet the term
                    // vector files don't exist.
                    format = 0;
                }


                this.fieldInfos = fieldInfos;
                success         = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Close();
                }
            }
        }
Пример #31
0
 public override long Length()
 {
     return(delegate_Renamed.Length());
 }
Пример #32
0
		public CompoundFileReader(Directory dir, System.String name)
		{
			directory = dir;
			fileName = name;
			
			bool success = false;
			
			try
			{
				stream = dir.OpenInput(name);
				
				// read the directory and init files
				int count = stream.ReadVInt();
				FileEntry entry = null;
				for (int i = 0; i < count; i++)
				{
					long offset = stream.ReadLong();
					System.String id = stream.ReadString();
					
					if (entry != null)
					{
						// set length of the previous entry
						entry.length = offset - entry.offset;
					}
					
					entry = new FileEntry();
					entry.offset = offset;
					entries[id] = entry;
				}
				
				// set the length of the final entry
				if (entry != null)
				{
					entry.length = stream.Length() - entry.offset;
				}
				
				success = true;
			}
			finally
			{
				if (!success && (stream != null))
				{
					try
					{
						stream.Close();
					}
					catch (System.IO.IOException)
					{
					}
				}
			}
		}
Пример #33
0
        internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;

            isOriginal = true;
            try
            {
                fieldInfos = fn;

                cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
                cloneableIndexStream  = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);

                // First version of fdx did not include a format
                // header, but, the first int will always be 0 in that
                // case
                int firstInt = cloneableIndexStream.ReadInt();
                format = firstInt == 0 ? 0 : firstInt;

                if (format > FieldsWriter.FORMAT_CURRENT)
                {
                    throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");
                }

                formatSize = format > FieldsWriter.FORMAT ? 4 : 0;

                if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                {
                    cloneableFieldsStream.SetModifiedUTF8StringsMode();
                }

                fieldsStream = (IndexInput)cloneableFieldsStream.Clone();

                long indexSize = cloneableIndexStream.Length() - formatSize;

                if (docStoreOffset != -1)
                {
                    // We read only a slice out of this shared fields file
                    this.docStoreOffset = docStoreOffset;
                    this.size           = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }
                else
                {
                    this.docStoreOffset = 0;
                    this.size           = (int)(indexSize >> 3);
                }

                indexStream  = (IndexInput)cloneableIndexStream.Clone();
                numTotalDocs = (int)(indexSize >> 3);
                success      = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Dispose();
                }
            }
        }
Пример #34
0
        /// <summary> Read a particular segmentFileName.  Note that this may
        /// throw an IOException if a commit is in process.
        ///
        /// </summary>
        /// <param name="directory">-- directory containing the segments file
        /// </param>
        /// <param name="segmentFileName">-- segment file to load
        /// </param>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public void  Read(Directory directory, System.String segmentFileName)
        {
            bool success = false;

            // Clear any previous segments:
            Clear();

            IndexInput input = directory.OpenInput(segmentFileName);

            generation = GenerationFromSegmentsFileName(segmentFileName);

            lastGeneration = generation;

            try
            {
                int format = input.ReadInt();
                if (format < 0)
                {
                    // file contains explicit format info
                    // check that it is a format we can understand
                    if (format < CURRENT_FORMAT)
                    {
                        throw new CorruptIndexException("Unknown format version: " + format);
                    }
                    version = input.ReadLong();                    // read version
                    counter = input.ReadInt();                     // read counter
                }
                else
                {
                    // file is in old format without explicit format info
                    counter = format;
                }

                for (int i = input.ReadInt(); i > 0; i--)
                {
                    // read segmentInfos
                    Add(new SegmentInfo(directory, format, input));
                }

                if (format >= 0)
                {
                    // in old format the version number may be at the end of the file
                    if (input.GetFilePointer() >= input.Length())
                    {
                        version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
                    }
                    // old file format without version number
                    else
                    {
                        version = input.ReadLong();                         // read version
                    }
                }
                success = true;
            }
            finally
            {
                input.Close();
                if (!success)
                {
                    // Clear any segment infos we had loaded so we
                    // have a clean slate on retry:
                    Clear();
                }
            }
        }
Пример #35
0
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext)
        {
            string     fileName = IndexFileNames.SegmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION);
            IndexInput input    = directory.OpenInput(fileName, iocontext);

            try
            {
                int format = input.ReadVInt();

                if (format > FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT);
                }
                if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW)
                {
                    throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT);
                }

                int         size  = input.ReadVInt(); //read in the size
                FieldInfo[] infos = new FieldInfo[size];

                for (int i = 0; i < size; i++)
                {
                    string name            = input.ReadString();
                    int    fieldNumber     = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.ReadInt() : i;
                    byte   bits            = input.ReadByte();
                    bool   isIndexed       = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0;
                    bool   storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0;
                    bool   omitNorms       = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0;
                    bool   storePayloads   = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0;
                    FieldInfo.IndexOptions?indexOptions;
                    if (!isIndexed)
                    {
                        indexOptions = null;
                    }
                    else if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0)
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                    }
                    else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0)
                    {
                        if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS)
                        {
                            indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
                        }
                        else
                        {
                            throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
                        }
                    }
                    else
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    }

                    // LUCENE-3027: past indices were able to write
                    // storePayloads=true when omitTFAP is also true,
                    // which is invalid.  We correct that, here:
                    if (indexOptions != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                    {
                        storePayloads = false;
                    }

                    DocValuesType_e?normType = isIndexed && !omitNorms ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null;
                    if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != null)
                    {
                        // RW can have norms but doesn't write them
                        normType = input.ReadByte() != 0 ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null;
                    }

                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, null, normType, null);
                }

                if (input.FilePointer != input.Length())
                {
                    throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length() + " (resource: " + input + ")");
                }
                return(new FieldInfos(infos));
            }
            finally
            {
                input.Dispose();
            }
        }