Ejemplo n.º 1
0
        public void  Read(IndexInput input, FieldInfos fieldInfos)
        {
            this.term = null;             // invalidate cache
            int start       = input.ReadVInt();
            int length      = input.ReadVInt();
            int totalLength = start + length;

            if (preUTF8Strings)
            {
                text.SetLength(totalLength);
                input.ReadChars(text.result, start, length);
            }
            else
            {
                if (dirty)
                {
                    // Fully convert all bytes since bytes is dirty
                    UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
                    dirty = false;
                }
                else
                {
                    // Incrementally convert only the UTF8 bytes that are new:
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
                }
            }
            this.field = fieldInfos.FieldName(input.ReadVInt());
        }
Ejemplo n.º 2
0
        public byte[] GetPayload(byte[] data, int offset)
        {
            if (!needToLoadPayload)
            {
                throw new System.IO.IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
            }

            // read payloads lazily
            byte[] retArray;
            int    retOffset;

            if (data == null || data.Length - offset < payloadLength)
            {
                // the array is too small to store the payload data,
                // so we allocate a new one
                retArray  = new byte[payloadLength];
                retOffset = 0;
            }
            else
            {
                retArray  = data;
                retOffset = offset;
            }
            proxStream.ReadBytes(retArray, retOffset, payloadLength);
            needToLoadPayload = false;
            return(retArray);
        }
Ejemplo n.º 3
0
 /// <summary>The value of the field as a String, or null.  If null, the Reader value,
 /// binary value, or TokenStream value is used.  Exactly one of stringValue(),
 /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
 /// </summary>
 public override System.String StringValue()
 {
     Enclosing_Instance.EnsureOpen();
     if (isBinary)
     {
         return(null);
     }
     else
     {
         if (fieldsData == null)
         {
             IndexInput localFieldsStream = GetFieldStream();
             try
             {
                 localFieldsStream.Seek(pointer);
                 if (isCompressed)
                 {
                     byte[] b = new byte[toRead];
                     localFieldsStream.ReadBytes(b, 0, b.Length);
                     fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
                 }
                 else
                 {
                     if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                     {
                         byte[] bytes = new byte[toRead];
                         localFieldsStream.ReadBytes(bytes, 0, toRead);
                         fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(bytes);
                     }
                     else
                     {
                         //read in chars b/c we already know the length we need to read
                         char[] chars = new char[toRead];
                         localFieldsStream.ReadChars(chars, 0, toRead);
                         fieldsData = new System.String(chars);
                     }
                 }
             }
             catch (System.IO.IOException e)
             {
                 throw new FieldReaderException(e);
             }
         }
         return((System.String)fieldsData);
     }
 }
Ejemplo n.º 4
0
            /// <summary>Expert: implements buffer refill.  Reads bytes from the current
            /// position in the input.
            /// </summary>
            /// <param name="b">the array to read bytes into
            /// </param>
            /// <param name="offset">the offset in the array to start storing bytes
            /// </param>
            /// <param name="len">the number of bytes to read
            /// </param>
            public override void  ReadInternal(byte[] b, int offset, int len)
            {
                long start = GetFilePointer();

                if (start + len > length)
                {
                    throw new System.IO.IOException("read past EOF");
                }
                base_Renamed.Seek(fileOffset + start);
                base_Renamed.ReadBytes(b, offset, len, false);
            }
Ejemplo n.º 5
0
            public override byte[] GetBinaryValue(byte[] result)
            {
                Enclosing_Instance.EnsureOpen();

                if (isBinary)
                {
                    if (fieldsData == null)
                    {
                        // Allocate new buffer if result is null or too small
                        byte[] b;
                        if (result == null || result.Length < toRead)
                        {
                            b = new byte[toRead];
                        }
                        else
                        {
                            b = result;
                        }

                        IndexInput localFieldsStream = GetFieldStream();

                        // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
                        // since they are already handling this exception when getting the document
                        try
                        {
                            localFieldsStream.Seek(pointer);
                            localFieldsStream.ReadBytes(b, 0, toRead);
                            if (isCompressed == true)
                            {
                                fieldsData = Enclosing_Instance.Uncompress(b);
                            }
                            else
                            {
                                fieldsData = b;
                            }
                        }
                        catch (System.IO.IOException e)
                        {
                            throw new FieldReaderException(e);
                        }

                        binaryOffset = 0;
                        binaryLength = toRead;
                    }

                    return((byte[])fieldsData);
                }
                else
                {
                    return(null);
                }
            }
Ejemplo n.º 6
0
        /// <summary>Copy the contents of the file with specified extension into the
        /// provided output stream. Use the provided buffer for moving data
        /// to reduce memory allocation.
        /// </summary>
        private void  CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
        {
            IndexInput is_Renamed = null;

            try
            {
                long startPtr = os.GetFilePointer();

                is_Renamed = directory.OpenInput(source.file);
                long length    = is_Renamed.Length();
                long remainder = length;
                int  chunk     = buffer.Length;

                while (remainder > 0)
                {
                    int len = (int)System.Math.Min(chunk, remainder);
                    is_Renamed.ReadBytes(buffer, 0, len, false);
                    os.WriteBytes(buffer, len);
                    remainder -= len;
                    if (checkAbort != null)
                    {
                        // Roughly every 2 MB we will check if
                        // it's time to abort
                        checkAbort.Work(80);
                    }
                }

                // Verify that remainder is 0
                if (remainder != 0)
                {
                    throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
                }

                // Verify that the output length diff is equal to original file
                long endPtr = os.GetFilePointer();
                long diff   = endPtr - startPtr;
                if (diff != length)
                {
                    throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
                }
            }
            finally
            {
                if (is_Renamed != null)
                {
                    is_Renamed.Close();
                }
            }
        }
Ejemplo n.º 7
0
		public void  Read(IndexInput input, FieldInfos fieldInfos)
		{
			this.term = null; // invalidate cache
			int start = input.ReadVInt();
			int length = input.ReadVInt();
			int totalLength = start + length;
			if (preUTF8Strings)
			{
				text.SetLength(totalLength);
				input.ReadChars(text.result, start, length);
			}
			else
			{
				
				if (dirty)
				{
					// Fully convert all bytes since bytes is dirty
					UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
					bytes.SetLength(totalLength);
					input.ReadBytes(bytes.result, start, length);
					UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
					dirty = false;
				}
				else
				{
					// Incrementally convert only the UTF8 bytes that are new:
					bytes.SetLength(totalLength);
					input.ReadBytes(bytes.result, start, length);
					UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
				}
			}
			this.field = fieldInfos.FieldName(input.ReadVInt());
		}
Ejemplo n.º 8
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <param name="mapper">The mapper used to map the TermVector
        /// </param>
        /// <throws>  IOException </throws>
        private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return;
            }

            bool storePositions;
            bool storeOffsets;

            if (format >= FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets   = false;
            }
            mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            byte[] byteBuffer;
            char[] charBuffer;
            bool   preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;

            // init the buffers
            if (preUTF8)
            {
                charBuffer = new char[10];
                byteBuffer = null;
            }
            else
            {
                charBuffer = null;
                byteBuffer = new byte[20];
            }

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;

                System.String term;

                if (preUTF8)
                {
                    // Term stored as java chars
                    if (charBuffer.Length < totalLength)
                    {
                        char[] newCharBuffer = new char[(int)(1.5 * totalLength)];
                        Array.Copy(charBuffer, 0, newCharBuffer, 0, start);
                        charBuffer = newCharBuffer;
                    }
                    tvf.ReadChars(charBuffer, start, deltaLength);
                    term = new System.String(charBuffer, 0, totalLength);
                }
                else
                {
                    // Term stored as utf8 bytes
                    if (byteBuffer.Length < totalLength)
                    {
                        byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)];
                        Array.Copy(byteBuffer, 0, newByteBuffer, 0, start);
                        byteBuffer = newByteBuffer;
                    }
                    tvf.ReadBytes(byteBuffer, start, deltaLength);
                    term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength);
                }
                int   freq      = tvf.ReadVInt();
                int[] positions = null;
                if (storePositions)
                {
                    //read in the positions
                    //does the mapper even care about positions?
                    if (mapper.IsIgnoringPositions() == false)
                    {
                        positions = new int[freq];
                        int prevPosition = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            positions[j] = prevPosition + tvf.ReadVInt();
                            prevPosition = positions[j];
                        }
                    }
                    else
                    {
                        //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
                        //
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                        }
                    }
                }
                TermVectorOffsetInfo[] offsets = null;
                if (storeOffsets)
                {
                    //does the mapper even care about offsets?
                    if (mapper.IsIgnoringOffsets() == false)
                    {
                        offsets = new TermVectorOffsetInfo[freq];
                        int prevOffset = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            int startOffset = prevOffset + tvf.ReadVInt();
                            int endOffset   = startOffset + tvf.ReadVInt();
                            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
                            prevOffset = endOffset;
                        }
                    }
                    else
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                            tvf.ReadVInt();
                        }
                    }
                }
                mapper.Map(term, freq, offsets, positions);
            }
        }
Ejemplo n.º 9
0
 /// <summary>Read as a bit set </summary>
 private void  ReadBits(IndexInput input)
 {
     count = input.ReadInt();             // read count
     bits  = new byte[(size >> 3) + 1];   // allocate bits
     input.ReadBytes(bits, 0, bits.Length);
 }
Ejemplo n.º 10
0
		/// <summary>Read as a bit set </summary>
		private void  ReadBits(IndexInput input)
		{
			count = input.ReadInt(); // read count
			bits = new byte[(size >> 3) + 1]; // allocate bits
			input.ReadBytes(bits, 0, bits.Length);
		}
Ejemplo n.º 11
0
 internal SkipBuffer(IndexInput input, int length)
 {
     data    = new byte[length];
     pointer = input.GetFilePointer();
     input.ReadBytes(data, 0, length);
 }
Ejemplo n.º 12
0
			internal SkipBuffer(IndexInput input, int length)
			{
				data = new byte[length];
				pointer = input.GetFilePointer();
				input.ReadBytes(data, 0, length);
			}