Пример #1
0
 /// <summary> Construct a new SegmentInfo instance by reading a
 /// previously saved SegmentInfo from input.
 ///
 /// </summary>
 /// <param name="dir">directory to load from
 /// </param>
 /// <param name="format">format of the segments info file
 /// </param>
 /// <param name="input">input handle to read segment info from
 /// </param>
 internal SegmentInfo(Directory dir, int format, IndexInput input)
 {
     this.dir = dir;
     name     = input.ReadString();
     docCount = input.ReadInt();
     if (format <= SegmentInfos.FORMAT_LOCKLESS)
     {
         delGen = input.ReadLong();
         if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
         {
             docStoreOffset = input.ReadInt();
             if (docStoreOffset != -1)
             {
                 docStoreSegment        = input.ReadString();
                 docStoreIsCompoundFile = (1 == input.ReadByte());
             }
             else
             {
                 docStoreSegment        = name;
                 docStoreIsCompoundFile = false;
             }
         }
         else
         {
             docStoreOffset         = -1;
             docStoreSegment        = name;
             docStoreIsCompoundFile = false;
         }
         if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
         {
             hasSingleNormFile = (1 == input.ReadByte());
         }
         else
         {
             hasSingleNormFile = false;
         }
         int numNormGen = input.ReadInt();
         if (numNormGen == NO)
         {
             normGen = null;
         }
         else
         {
             normGen = new long[numNormGen];
             for (int j = 0; j < numNormGen; j++)
             {
                 normGen[j] = input.ReadLong();
             }
         }
         isCompoundFile = (sbyte)input.ReadByte();
         preLockless    = (isCompoundFile == CHECK_DIR);
         if (format <= SegmentInfos.FORMAT_DEL_COUNT)
         {
             delCount = input.ReadInt();
             System.Diagnostics.Debug.Assert(delCount <= docCount);
         }
         else
         {
             delCount = -1;
         }
         if (format <= SegmentInfos.FORMAT_HAS_PROX)
         {
             hasProx = input.ReadByte() == 1;
         }
         else
         {
             hasProx = true;
         }
     }
     else
     {
         delGen                 = CHECK_DIR;
         normGen                = null;
         isCompoundFile         = (sbyte)(CHECK_DIR);
         preLockless            = true;
         hasSingleNormFile      = false;
         docStoreOffset         = -1;
         docStoreIsCompoundFile = false;
         docStoreSegment        = null;
         delCount               = -1;
         hasProx                = true;
     }
 }
Пример #2
0
        public /*internal*/ Document Doc(int n)
        {
            indexStream.Seek(n * 8L);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            Document doc       = new Document();
            int      numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int       fieldNumber = fieldsStream.ReadVInt();
                FieldInfo fi          = fieldInfos.FieldInfo(fieldNumber);

                byte bits = fieldsStream.ReadByte();

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                bool tokenize   = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;

                if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0)
                {
                    byte[] b = new byte[fieldsStream.ReadVInt()];
                    fieldsStream.ReadBytes(b, 0, b.Length);
                    if (compressed)
                    {
                        doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
                    }
                    else
                    {
                        doc.Add(new Field(fi.name, b, Field.Store.YES));
                    }
                }
                else
                {
                    Field.Index index;
                    Field.Store store = Field.Store.YES;

                    if (fi.isIndexed && tokenize)
                    {
                        index = Field.Index.TOKENIZED;
                    }
                    else if (fi.isIndexed && !tokenize)
                    {
                        index = Field.Index.UN_TOKENIZED;
                    }
                    else
                    {
                        index = Field.Index.NO;
                    }

                    Field.TermVector termVector = null;
                    if (fi.storeTermVector)
                    {
                        if (fi.storeOffsetWithTermVector)
                        {
                            if (fi.storePositionWithTermVector)
                            {
                                termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
                            }
                            else
                            {
                                termVector = Field.TermVector.WITH_OFFSETS;
                            }
                        }
                        else if (fi.storePositionWithTermVector)
                        {
                            termVector = Field.TermVector.WITH_POSITIONS;
                        }
                        else
                        {
                            termVector = Field.TermVector.YES;
                        }
                    }
                    else
                    {
                        termVector = Field.TermVector.NO;
                    }

                    if (compressed)
                    {
                        store = Field.Store.COMPRESS;
                        byte[] b = new byte[fieldsStream.ReadVInt()];
                        fieldsStream.ReadBytes(b, 0, b.Length);
                        Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                    else
                    {
                        Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                }
            }

            return(doc);
        }
Пример #3
0
		/// <summary> Construct a new SegmentInfo instance by reading a
		/// previously saved SegmentInfo from input.
		/// 
		/// </summary>
		/// <param name="dir">directory to load from
		/// </param>
		/// <param name="format">format of the segments info file
		/// </param>
		/// <param name="input">input handle to read segment info from
		/// </param>
		internal SegmentInfo(Directory dir, int format, IndexInput input)
		{
			this.dir = dir;
			name = input.ReadString();
			docCount = input.ReadInt();
			if (format <= SegmentInfos.FORMAT_LOCKLESS)
			{
				delGen = input.ReadLong();
				if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
				{
					docStoreOffset = input.ReadInt();
					if (docStoreOffset != - 1)
					{
						docStoreSegment = input.ReadString();
						docStoreIsCompoundFile = (1 == input.ReadByte());
					}
					else
					{
						docStoreSegment = name;
						docStoreIsCompoundFile = false;
					}
				}
				else
				{
					docStoreOffset = - 1;
					docStoreSegment = name;
					docStoreIsCompoundFile = false;
				}
				if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
				{
					hasSingleNormFile = (1 == input.ReadByte());
				}
				else
				{
					hasSingleNormFile = false;
				}
				int numNormGen = input.ReadInt();
				if (numNormGen == NO)
				{
					normGen = null;
				}
				else
				{
					normGen = new long[numNormGen];
					for (int j = 0; j < numNormGen; j++)
					{
						normGen[j] = input.ReadLong();
					}
				}
				isCompoundFile = (sbyte) input.ReadByte();
				preLockless = (isCompoundFile == CHECK_DIR);
				if (format <= SegmentInfos.FORMAT_DEL_COUNT)
				{
					delCount = input.ReadInt();
					System.Diagnostics.Debug.Assert(delCount <= docCount);
				}
				else
					delCount = - 1;
				if (format <= SegmentInfos.FORMAT_HAS_PROX)
					hasProx = input.ReadByte() == 1;
				else
					hasProx = true;
				
				if (format <= SegmentInfos.FORMAT_DIAGNOSTICS)
				{
					diagnostics = input.ReadStringStringMap();
				}
				else
				{
					diagnostics = new System.Collections.Generic.Dictionary<string,string>();
				}
			}
			else
			{
				delGen = CHECK_DIR;
				normGen = null;
				isCompoundFile = (sbyte) (CHECK_DIR);
				preLockless = true;
				hasSingleNormFile = false;
				docStoreOffset = - 1;
				docStoreIsCompoundFile = false;
				docStoreSegment = null;
				delCount = - 1;
				hasProx = true;
				diagnostics = new System.Collections.Generic.Dictionary<string,string>();
			}
		}
        private void Read(IndexInput input)
        {
            int size = input.ReadVInt(); //read in the size
            for (int i = 0; i < size; i++)
            {
                System.String name = String.Intern(input.ReadString());
                byte bits = input.ReadByte();
                bool isIndexed = (bits & IS_INDEXED) != 0;
                bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
                bool omitNorms = (bits & OMIT_NORMS) != 0;

                AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms);
            }
        }
Пример #5
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <param name="mapper">The mapper used to map the TermVector
        /// </param>
        /// <throws>  IOException </throws>
        private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return;
            }

            bool storePositions;
            bool storeOffsets;

            if (format >= FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets   = false;
            }
            mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            byte[] byteBuffer;
            char[] charBuffer;
            bool   preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;

            // init the buffers
            if (preUTF8)
            {
                charBuffer = new char[10];
                byteBuffer = null;
            }
            else
            {
                charBuffer = null;
                byteBuffer = new byte[20];
            }

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;

                System.String term;

                if (preUTF8)
                {
                    // Term stored as java chars
                    if (charBuffer.Length < totalLength)
                    {
                        char[] newCharBuffer = new char[(int)(1.5 * totalLength)];
                        Array.Copy(charBuffer, 0, newCharBuffer, 0, start);
                        charBuffer = newCharBuffer;
                    }
                    tvf.ReadChars(charBuffer, start, deltaLength);
                    term = new System.String(charBuffer, 0, totalLength);
                }
                else
                {
                    // Term stored as utf8 bytes
                    if (byteBuffer.Length < totalLength)
                    {
                        byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)];
                        Array.Copy(byteBuffer, 0, newByteBuffer, 0, start);
                        byteBuffer = newByteBuffer;
                    }
                    tvf.ReadBytes(byteBuffer, start, deltaLength);
                    term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength);
                }
                int   freq      = tvf.ReadVInt();
                int[] positions = null;
                if (storePositions)
                {
                    //read in the positions
                    //does the mapper even care about positions?
                    if (mapper.IsIgnoringPositions() == false)
                    {
                        positions = new int[freq];
                        int prevPosition = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            positions[j] = prevPosition + tvf.ReadVInt();
                            prevPosition = positions[j];
                        }
                    }
                    else
                    {
                        //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
                        //
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                        }
                    }
                }
                TermVectorOffsetInfo[] offsets = null;
                if (storeOffsets)
                {
                    //does the mapper even care about offsets?
                    if (mapper.IsIgnoringOffsets() == false)
                    {
                        offsets = new TermVectorOffsetInfo[freq];
                        int prevOffset = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            int startOffset = prevOffset + tvf.ReadVInt();
                            int endOffset   = startOffset + tvf.ReadVInt();
                            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
                            prevOffset = endOffset;
                        }
                    }
                    else
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                            tvf.ReadVInt();
                        }
                    }
                }
                mapper.Map(term, freq, offsets, positions);
            }
        }
Пример #6
0
 /// <summary>read as a d-gaps list </summary>
 private void ReadDgaps(IndexInput input)
 {
     size = input.ReadInt(); // (re)read size
     count = input.ReadInt(); // read count
     bits = new byte[(size >> 3) + 1]; // allocate bits
     int last = 0;
     int n = Count();
     while (n > 0)
     {
         last += input.ReadVInt();
         bits[last] = input.ReadByte();
         n -= BYTE_COUNTS[bits[last] & 0xFF];
     }
 }
Пример #7
0
 /// <summary>
 /// read as a d-gaps cleared bits list </summary>
 private void ReadClearedDgaps(IndexInput input)
 {
     Size_Renamed = input.ReadInt(); // (re)read size
     Count_Renamed = input.ReadInt(); // read count
     Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits
     for (int i = 0; i < Bits.Length; ++i)
     {
         Bits[i] = 0xff;
     }
     ClearUnusedBits();
     int last = 0;
     int numCleared = Size() - Count();
     while (numCleared > 0)
     {
         last += input.ReadVInt();
         Bits[last] = input.ReadByte();
         numCleared -= 8 - BitUtil.BitCount(Bits[last]);
         Debug.Assert(numCleared >= 0 || (last == (Bits.Length - 1) && numCleared == -(8 - (Size_Renamed & 7))));
     }
 }
Пример #8
0
		/// <summary> Construct a new SegmentInfo instance by reading a
		/// previously saved SegmentInfo from input.
		/// 
		/// </summary>
		/// <param name="dir">directory to load from
		/// </param>
		/// <param name="format">format of the segments info file
		/// </param>
		/// <param name="input">input handle to read segment info from
		/// </param>
		public SegmentInfo(Directory dir, int format, IndexInput input)
		{
			this.dir = dir;
			name = input.ReadString();
			docCount = input.ReadInt();
			if (format <= SegmentInfos.FORMAT_LOCKLESS)
			{
				delGen = input.ReadLong();
				if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
				{
					hasSingleNormFile = (1 == input.ReadByte());
				}
				else
				{
					hasSingleNormFile = false;
				}
				int numNormGen = input.ReadInt();
				if (numNormGen == - 1)
				{
					normGen = null;
				}
				else
				{
					normGen = new long[numNormGen];
					for (int j = 0; j < numNormGen; j++)
					{
						normGen[j] = input.ReadLong();
					}
				}
				isCompoundFile = (sbyte) input.ReadByte();
				preLockless = isCompoundFile == 0;
			}
			else
			{
				delGen = 0;
				normGen = null;
				isCompoundFile = 0;
				preLockless = true;
				hasSingleNormFile = false;
			}
		}
 private void ReadFields(IndexInput meta, FieldInfos infos)
 {
     int fieldNumber = meta.ReadVInt();
     while (fieldNumber != -1)
     {
         // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check
         // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616
         if (fieldNumber < 0)
         {
             // trickier to validate more: because we re-use for norms, because we use multiple entries
             // for "composite" types like sortedset, etc.
             throw new Exception("Invalid field number: " + fieldNumber + " (resource=" + meta + ")");
         }
         byte type = meta.ReadByte();
         if (type == Lucene45DocValuesFormat.NUMERIC)
         {
             Numerics[fieldNumber] = ReadNumericEntry(meta);
         }
         else if (type == Lucene45DocValuesFormat.BINARY)
         {
             BinaryEntry b = ReadBinaryEntry(meta);
             Binaries[fieldNumber] = b;
         }
         else if (type == Lucene45DocValuesFormat.SORTED)
         {
             ReadSortedField(fieldNumber, meta, infos);
         }
         else if (type == Lucene45DocValuesFormat.SORTED_SET)
         {
             SortedSetEntry ss = ReadSortedSetEntry(meta);
             SortedSets[fieldNumber] = ss;
             if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES)
             {
                 ReadSortedSetFieldWithAddresses(fieldNumber, meta, infos);
             }
             else if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED)
             {
                 if (meta.ReadVInt() != fieldNumber)
                 {
                     throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
                 }
                 if (meta.ReadByte() != Lucene45DocValuesFormat.SORTED)
                 {
                     throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
                 }
                 ReadSortedField(fieldNumber, meta, infos);
             }
             else
             {
                 throw new Exception();
             }
         }
         else
         {
             throw new Exception("invalid type: " + type + ", resource=" + meta);
         }
         fieldNumber = meta.ReadVInt();
     }
 }
Пример #10
0
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext)
        {
            string     fileName = IndexFileNames.SegmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION);
            IndexInput input    = directory.OpenInput(fileName, iocontext);

            try
            {
                int format = input.ReadVInt();

                if (format > FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT);
                }
                if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW)
                {
                    throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT);
                }

                int         size  = input.ReadVInt(); //read in the size
                FieldInfo[] infos = new FieldInfo[size];

                for (int i = 0; i < size; i++)
                {
                    string name            = input.ReadString();
                    int    fieldNumber     = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.ReadInt() : i;
                    byte   bits            = input.ReadByte();
                    bool   isIndexed       = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0;
                    bool   storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0;
                    bool   omitNorms       = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0;
                    bool   storePayloads   = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0;
                    FieldInfo.IndexOptions?indexOptions;
                    if (!isIndexed)
                    {
                        indexOptions = null;
                    }
                    else if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0)
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                    }
                    else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0)
                    {
                        if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS)
                        {
                            indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
                        }
                        else
                        {
                            throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
                        }
                    }
                    else
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    }

                    // LUCENE-3027: past indices were able to write
                    // storePayloads=true when omitTFAP is also true,
                    // which is invalid.  We correct that, here:
                    if (indexOptions != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                    {
                        storePayloads = false;
                    }

                    DocValuesType_e?normType = isIndexed && !omitNorms ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null;
                    if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != null)
                    {
                        // RW can have norms but doesn't write them
                        normType = input.ReadByte() != 0 ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null;
                    }

                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, null, normType, null);
                }

                if (input.FilePointer != input.Length())
                {
                    throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length() + " (resource: " + input + ")");
                }
                return(new FieldInfos(infos));
            }
            finally
            {
                input.Dispose();
            }
        }
Пример #11
0
		/// <summary> Construct a new SegmentInfo instance by reading a
		/// previously saved SegmentInfo from input.
		/// 
		/// </summary>
		/// <param name="dir">directory to load from
		/// </param>
		/// <param name="format">format of the segments info file
		/// </param>
		/// <param name="input">input handle to read segment info from
		/// </param>
		internal SegmentInfo(Directory dir, int format, IndexInput input)
		{
			this.dir = dir;
			name = input.ReadString();
			docCount = input.ReadInt();
			if (format <= SegmentInfos.FORMAT_LOCKLESS)
			{
				delGen = input.ReadLong();
				if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
				{
					docStoreOffset = input.ReadInt();
					if (docStoreOffset != - 1)
					{
						docStoreSegment = input.ReadString();
						docStoreIsCompoundFile = (1 == input.ReadByte());
					}
					else
					{
						docStoreSegment = name;
						docStoreIsCompoundFile = false;
					}
				}
				else
				{
					docStoreOffset = - 1;
					docStoreSegment = name;
					docStoreIsCompoundFile = false;
				}
				if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
				{
					hasSingleNormFile = (1 == input.ReadByte());
				}
				else
				{
					hasSingleNormFile = false;
				}
				int numNormGen = input.ReadInt();
				if (numNormGen == NO)
				{
					normGen = null;
				}
				else
				{
					normGen = new long[numNormGen];
					for (int j = 0; j < numNormGen; j++)
					{
						normGen[j] = input.ReadLong();
					}
				}
				isCompoundFile = (sbyte) input.ReadByte();
				preLockless = (isCompoundFile == CHECK_DIR);
			}
			else
			{
				delGen = CHECK_DIR;
				normGen = null;
				isCompoundFile = (sbyte) (CHECK_DIR);
				preLockless = true;
				hasSingleNormFile = false;
				docStoreOffset = - 1;
				docStoreIsCompoundFile = false;
				docStoreSegment = null;
			}
		}
Пример #12
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <param name="mapper">The mapper used to map the TermVector
        /// </param>
        /// <returns> The TermVector located at that position
        /// </returns>
        /// <throws>  IOException </throws>
        private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return;
            }

            bool storePositions;
            bool storeOffsets;

            if (tvfFormat == FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets   = false;
            }
            mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            char[] buffer         = new char[10];     // init the buffer with a length of 10 character
            char[] previousBuffer = new char[] {};

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;
                if (buffer.Length < totalLength)
                {
                    // increase buffer
                    buffer = null;                     // give a hint to garbage collector
                    buffer = new char[totalLength];

                    if (start > 0)
                    {
                        // just copy if necessary
                        Array.Copy(previousBuffer, 0, buffer, 0, start);
                    }
                }

                tvf.ReadChars(buffer, start, deltaLength);
                System.String term = new System.String(buffer, 0, totalLength);
                previousBuffer = buffer;
                int   freq      = tvf.ReadVInt();
                int[] positions = null;
                if (storePositions)
                {
                    //read in the positions
                    //does the mapper even care about positions?
                    if (mapper.IsIgnoringPositions() == false)
                    {
                        positions = new int[freq];
                        int prevPosition = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            positions[j] = prevPosition + tvf.ReadVInt();
                            prevPosition = positions[j];
                        }
                    }
                    else
                    {
                        //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
                        //
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                        }
                    }
                }
                TermVectorOffsetInfo[] offsets = null;
                if (storeOffsets)
                {
                    //does the mapper even care about offsets?
                    if (mapper.IsIgnoringOffsets() == false)
                    {
                        offsets = new TermVectorOffsetInfo[freq];
                        int prevOffset = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            int startOffset = prevOffset + tvf.ReadVInt();
                            int endOffset   = startOffset + tvf.ReadVInt();
                            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
                            prevOffset = endOffset;
                        }
                    }
                    else
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                            tvf.ReadVInt();
                        }
                    }
                }
                mapper.Map(term, freq, offsets, positions);
            }
        }
Пример #13
0
 private NumericDocValues LoadVarIntsField(FieldInfo field, IndexInput input)
 {
     CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);
     var header = (sbyte)input.ReadByte();
     if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64)
     {
         int maxDoc = State.SegmentInfo.DocCount;
         var values = new long[maxDoc];
         for (int i = 0; i < values.Length; i++)
         {
             values[i] = input.ReadLong();
         }
         RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
         return new NumericDocValuesAnonymousInnerClassHelper(values);
     }
     else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED)
     {
         long minValue = input.ReadLong();
         long defaultValue = input.ReadLong();
         PackedInts.Reader reader = PackedInts.GetReader(input);
         RamBytesUsed_Renamed.AddAndGet(reader.RamBytesUsed());
         return new NumericDocValuesAnonymousInnerClassHelper2(minValue, defaultValue, reader);
     }
     else
     {
         throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")");
     }
 }
Пример #14
0
 /// <summary>
 /// read as a d-gaps list </summary>
 private void ReadSetDgaps(IndexInput input)
 {
     Size_Renamed = input.ReadInt(); // (re)read size
     Count_Renamed = input.ReadInt(); // read count
     Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits
     int last = 0;
     int n = Count();
     while (n > 0)
     {
         last += input.ReadVInt();
         Bits[last] = input.ReadByte();
         n -= BitUtil.BitCount(Bits[last]);
         Debug.Assert(n >= 0);
     }
 }
Пример #15
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <returns> The TermVector located at that position
        /// </returns>
        /// <throws>  IOException </throws>
        private SegmentTermVector ReadTermVector(System.String field, long tvfPointer)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return(new SegmentTermVector(field, null, null));
            }

            bool storePositions;
            bool storeOffsets;

            if (tvfFormat == TermVectorsWriter.FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & TermVectorsWriter.STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & TermVectorsWriter.STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets   = false;
            }

            System.String[] terms     = new System.String[numTerms];
            int[]           termFreqs = new int[numTerms];

            //  we may not need these, but declare them
            int[][] positions = null;
            TermVectorOffsetInfo[][] offsets = null;
            if (storePositions)
            {
                positions = new int[numTerms][];
            }
            if (storeOffsets)
            {
                offsets = new TermVectorOffsetInfo[numTerms][];
            }

            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            char[] buffer         = new char[10];     // init the buffer with a length of 10 character
            char[] previousBuffer = new char[] {};

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;
                if (buffer.Length < totalLength)
                {
                    // increase buffer
                    buffer = null;                     // give a hint to garbage collector
                    buffer = new char[totalLength];

                    if (start > 0)
                    {
                        // just copy if necessary
                        Array.Copy(previousBuffer, 0, buffer, 0, start);
                    }
                }

                tvf.ReadChars(buffer, start, deltaLength);
                terms[i]       = new System.String(buffer, 0, totalLength);
                previousBuffer = buffer;
                int freq = tvf.ReadVInt();
                termFreqs[i] = freq;

                if (storePositions)
                {
                    //read in the positions
                    int[] pos = new int[freq];
                    positions[i] = pos;
                    int prevPosition = 0;
                    for (int j = 0; j < freq; j++)
                    {
                        pos[j]       = prevPosition + tvf.ReadVInt();
                        prevPosition = pos[j];
                    }
                }

                if (storeOffsets)
                {
                    TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq];
                    offsets[i] = offs;
                    int prevOffset = 0;
                    for (int j = 0; j < freq; j++)
                    {
                        int startOffset = prevOffset + tvf.ReadVInt();
                        int endOffset   = startOffset + tvf.ReadVInt();
                        offs[j]    = new TermVectorOffsetInfo(startOffset, endOffset);
                        prevOffset = endOffset;
                    }
                }
            }

            SegmentTermVector tv;

            if (storePositions || storeOffsets)
            {
                tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
            }
            else
            {
                tv = new SegmentTermVector(field, terms, termFreqs);
            }
            return(tv);
        }
        private void ReadSortedField(int fieldNumber, IndexInput meta, FieldInfos infos)
        {
            // sorted = binary + numeric
            if (meta.ReadVInt() != fieldNumber)
            {
                throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            if (meta.ReadByte() != Lucene45DocValuesFormat.BINARY)
            {
                throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            BinaryEntry b = ReadBinaryEntry(meta);
            Binaries[fieldNumber] = b;

            if (meta.ReadVInt() != fieldNumber)
            {
                throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC)
            {
                throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            NumericEntry n = ReadNumericEntry(meta);
            Ords[fieldNumber] = n;
        }
Пример #17
0
        public virtual void  TestRandomAccess()
        {
            SetUp_2();
            CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");

            // Open two files
            IndexInput e1 = dir.OpenInput("f11");
            IndexInput e2 = dir.OpenInput("f3");

            IndexInput a1 = cr.OpenInput("f11");
            IndexInput a2 = dir.OpenInput("f3");

            // Seek the first pair
            e1.Seek(100);
            a1.Seek(100);
            Assert.AreEqual(100, e1.GetFilePointer());
            Assert.AreEqual(100, a1.GetFilePointer());
            byte be1 = e1.ReadByte();
            byte ba1 = a1.ReadByte();

            Assert.AreEqual(be1, ba1);

            // Now seek the second pair
            e2.Seek(1027);
            a2.Seek(1027);
            Assert.AreEqual(1027, e2.GetFilePointer());
            Assert.AreEqual(1027, a2.GetFilePointer());
            byte be2 = e2.ReadByte();
            byte ba2 = a2.ReadByte();

            Assert.AreEqual(be2, ba2);

            // Now make sure the first one didn't move
            Assert.AreEqual(101, e1.GetFilePointer());
            Assert.AreEqual(101, a1.GetFilePointer());
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            // Now more the first one again, past the buffer length
            e1.Seek(1910);
            a1.Seek(1910);
            Assert.AreEqual(1910, e1.GetFilePointer());
            Assert.AreEqual(1910, a1.GetFilePointer());
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            // Now make sure the second set didn't move
            Assert.AreEqual(1028, e2.GetFilePointer());
            Assert.AreEqual(1028, a2.GetFilePointer());
            be2 = e2.ReadByte();
            ba2 = a2.ReadByte();
            Assert.AreEqual(be2, ba2);

            // Move the second set back, again cross the buffer size
            e2.Seek(17);
            a2.Seek(17);
            Assert.AreEqual(17, e2.GetFilePointer());
            Assert.AreEqual(17, a2.GetFilePointer());
            be2 = e2.ReadByte();
            ba2 = a2.ReadByte();
            Assert.AreEqual(be2, ba2);

            // Finally, make sure the first set didn't move
            // Now make sure the first one didn't move
            Assert.AreEqual(1911, e1.GetFilePointer());
            Assert.AreEqual(1911, a1.GetFilePointer());
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            e1.Close();
            e2.Close();
            a1.Close();
            a2.Close();
            cr.Close();
        }
        private void ReadSortedSetFieldWithAddresses(int fieldNumber, IndexInput meta, FieldInfos infos)
        {
            // sortedset = binary + numeric (addresses) + ordIndex
            if (meta.ReadVInt() != fieldNumber)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            if (meta.ReadByte() != Lucene45DocValuesFormat.BINARY)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            BinaryEntry b = ReadBinaryEntry(meta);
            Binaries[fieldNumber] = b;

            if (meta.ReadVInt() != fieldNumber)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            NumericEntry n1 = ReadNumericEntry(meta);
            Ords[fieldNumber] = n1;

            if (meta.ReadVInt() != fieldNumber)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            NumericEntry n2 = ReadNumericEntry(meta);
            OrdIndexes[fieldNumber] = n2;
        }
Пример #19
0
		private void  Read(IndexInput input, System.String fileName)
		{
			int firstInt = input.ReadVInt();
			
			if (firstInt < 0)
			{
				// This is a real format
				format = firstInt;
			}
			else
			{
				format = FORMAT_PRE;
			}
			
			if (format != FORMAT_PRE & format != FORMAT_START)
			{
				throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
			}
			
			int size;
			if (format == FORMAT_PRE)
			{
				size = firstInt;
			}
			else
			{
				size = input.ReadVInt(); //read in the size
			}
			
			for (int i = 0; i < size; i++)
			{
				System.String name = StringHelper.Intern(input.ReadString());
				byte bits = input.ReadByte();
				bool isIndexed = (bits & IS_INDEXED) != 0;
				bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
				bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
				bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
				bool omitNorms = (bits & OMIT_NORMS) != 0;
				bool storePayloads = (bits & STORE_PAYLOADS) != 0;
				bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
				
				AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
			}
			
			if (input.GetFilePointer() != input.Length())
			{
				throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length());
			}
		}
Пример #20
0
        public override long Get(int index)
        {
            long majorBitPos = (long)index * bitsPerValue;
            long elementPos  = (long)((ulong)majorBitPos >> 3);

            try
            {
                @in.Seek(StartPointer + elementPos);

                int bitPos = (int)(majorBitPos & 7);
                // round up bits to a multiple of 8 to find total bytes needed to read
                int roundedBits = ((bitPos + bitsPerValue + 7) & ~7);
                // the number of extra bits read at the end to shift out
                int shiftRightBits = roundedBits - bitPos - bitsPerValue;

                long rawValue;
                switch ((int)((uint)roundedBits >> 3))
                {
                case 1:
                    rawValue = @in.ReadByte();
                    break;

                case 2:
                    rawValue = @in.ReadShort();
                    break;

                case 3:
                    rawValue = ((long)@in.ReadShort() << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 4:
                    rawValue = @in.ReadInt();
                    break;

                case 5:
                    rawValue = ((long)@in.ReadInt() << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 6:
                    rawValue = ((long)@in.ReadInt() << 16) | (@in.ReadShort() & 0xFFFFL);
                    break;

                case 7:
                    rawValue = ((long)@in.ReadInt() << 24) | ((@in.ReadShort() & 0xFFFFL) << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 8:
                    rawValue = @in.ReadLong();
                    break;

                case 9:
                    // We must be very careful not to shift out relevant bits. So we account for right shift
                    // we would normally do on return here, and reset it.
                    rawValue       = (@in.ReadLong() << (8 - shiftRightBits)) | ((int)((uint)(@in.ReadByte() & 0xFFL) >> shiftRightBits));
                    shiftRightBits = 0;
                    break;

                default:
                    throw new InvalidOperationException("bitsPerValue too large: " + bitsPerValue);
                }
                return(((long)((ulong)rawValue >> shiftRightBits)) & ValueMask);
            }
            catch (System.IO.IOException ioe)
            {
                throw new InvalidOperationException("failed", ioe);
            }
        }