public void Read(IndexInput input, FieldInfos fieldInfos) { this.term = null; // invalidate cache int start = input.ReadVInt(); int length = input.ReadVInt(); int totalLength = start + length; if (preUTF8Strings) { text.SetLength(totalLength); input.ReadChars(text.result, start, length); } else { if (dirty) { // Fully convert all bytes since bytes is dirty UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes); bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text); dirty = false; } else { // Incrementally convert only the UTF8 bytes that are new: bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text); } } this.field = fieldInfos.FieldName(input.ReadVInt()); }
public byte[] GetPayload(byte[] data, int offset) { if (!needToLoadPayload) { throw new System.IO.IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); } // read payloads lazily byte[] retArray; int retOffset; if (data == null || data.Length - offset < payloadLength) { // the array is too small to store the payload data, // so we allocate a new one retArray = new byte[payloadLength]; retOffset = 0; } else { retArray = data; retOffset = offset; } proxStream.ReadBytes(retArray, retOffset, payloadLength); needToLoadPayload = false; return(retArray); }
/// <summary>The value of the field as a String, or null. If null, the Reader value, /// binary value, or TokenStream value is used. Exactly one of stringValue(), /// readerValue(), binaryValue(), and tokenStreamValue() must be set. /// </summary> public override System.String StringValue() { Enclosing_Instance.EnsureOpen(); if (isBinary) { return(null); } else { if (fieldsData == null) { IndexInput localFieldsStream = GetFieldStream(); try { localFieldsStream.Seek(pointer); if (isCompressed) { byte[] b = new byte[toRead]; localFieldsStream.ReadBytes(b, 0, b.Length); fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b)); } else { if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { byte[] bytes = new byte[toRead]; localFieldsStream.ReadBytes(bytes, 0, toRead); fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(bytes); } else { //read in chars b/c we already know the length we need to read char[] chars = new char[toRead]; localFieldsStream.ReadChars(chars, 0, toRead); fieldsData = new System.String(chars); } } } catch (System.IO.IOException e) { throw new FieldReaderException(e); } } return((System.String)fieldsData); } }
/// <summary>Expert: implements buffer refill. Reads bytes from the current /// position in the input. /// </summary> /// <param name="b">the array to read bytes into /// </param> /// <param name="offset">the offset in the array to start storing bytes /// </param> /// <param name="len">the number of bytes to read /// </param> public override void ReadInternal(byte[] b, int offset, int len) { long start = GetFilePointer(); if (start + len > length) { throw new System.IO.IOException("read past EOF"); } base_Renamed.Seek(fileOffset + start); base_Renamed.ReadBytes(b, offset, len, false); }
public override byte[] GetBinaryValue(byte[] result) { Enclosing_Instance.EnsureOpen(); if (isBinary) { if (fieldsData == null) { // Allocate new buffer if result is null or too small byte[] b; if (result == null || result.Length < toRead) { b = new byte[toRead]; } else { b = result; } IndexInput localFieldsStream = GetFieldStream(); // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people // since they are already handling this exception when getting the document try { localFieldsStream.Seek(pointer); localFieldsStream.ReadBytes(b, 0, toRead); if (isCompressed == true) { fieldsData = Enclosing_Instance.Uncompress(b); } else { fieldsData = b; } } catch (System.IO.IOException e) { throw new FieldReaderException(e); } binaryOffset = 0; binaryLength = toRead; } return((byte[])fieldsData); } else { return(null); } }
/// <summary>Copy the contents of the file with specified extension into the /// provided output stream. Use the provided buffer for moving data /// to reduce memory allocation. /// </summary> private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer) { IndexInput is_Renamed = null; try { long startPtr = os.GetFilePointer(); is_Renamed = directory.OpenInput(source.file); long length = is_Renamed.Length(); long remainder = length; int chunk = buffer.Length; while (remainder > 0) { int len = (int)System.Math.Min(chunk, remainder); is_Renamed.ReadBytes(buffer, 0, len, false); os.WriteBytes(buffer, len); remainder -= len; if (checkAbort != null) { // Roughly every 2 MB we will check if // it's time to abort checkAbort.Work(80); } } // Verify that remainder is 0 if (remainder != 0) { throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); } // Verify that the output length diff is equal to original file long endPtr = os.GetFilePointer(); long diff = endPtr - startPtr; if (diff != length) { throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); } } finally { if (is_Renamed != null) { is_Renamed.Close(); } } }
/// <summary> </summary> /// <param name="field">The field to read in /// </param> /// <param name="tvfPointer">The pointer within the tvf file where we should start reading /// </param> /// <param name="mapper">The mapper used to map the TermVector /// </param> /// <throws> IOException </throws> private void ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper) { // Now read the data from specified position //We don't need to offset by the FORMAT here since the pointer already includes the offset tvf.Seek(tvfPointer); int numTerms = tvf.ReadVInt(); //System.out.println("Num Terms: " + numTerms); // If no terms - return a constant empty termvector. However, this should never occur! if (numTerms == 0) { return; } bool storePositions; bool storeOffsets; if (format >= FORMAT_VERSION) { byte bits = tvf.ReadByte(); storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; } else { tvf.ReadVInt(); storePositions = false; storeOffsets = false; } mapper.SetExpectations(field, numTerms, storeOffsets, storePositions); int start = 0; int deltaLength = 0; int totalLength = 0; byte[] byteBuffer; char[] charBuffer; bool preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES; // init the buffers if (preUTF8) { charBuffer = new char[10]; byteBuffer = null; } else { charBuffer = null; byteBuffer = new byte[20]; } for (int i = 0; i < numTerms; i++) { start = tvf.ReadVInt(); deltaLength = tvf.ReadVInt(); totalLength = start + deltaLength; System.String term; if (preUTF8) { // Term stored as java chars if (charBuffer.Length < totalLength) { char[] newCharBuffer = new char[(int)(1.5 * totalLength)]; Array.Copy(charBuffer, 0, newCharBuffer, 0, start); charBuffer = newCharBuffer; } tvf.ReadChars(charBuffer, start, deltaLength); term = new System.String(charBuffer, 0, totalLength); } else { // Term stored as utf8 bytes if (byteBuffer.Length < totalLength) { byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)]; Array.Copy(byteBuffer, 0, newByteBuffer, 0, start); byteBuffer = newByteBuffer; } tvf.ReadBytes(byteBuffer, start, deltaLength); term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength); } int freq = tvf.ReadVInt(); int[] positions = null; if (storePositions) { //read in the positions //does the mapper even care about positions? if (mapper.IsIgnoringPositions() == false) { positions = new int[freq]; int prevPosition = 0; for (int j = 0; j < freq; j++) { positions[j] = prevPosition + tvf.ReadVInt(); prevPosition = positions[j]; } } else { //we need to skip over the positions. Since these are VInts, I don't believe there is anyway to know for sure how far to skip // for (int j = 0; j < freq; j++) { tvf.ReadVInt(); } } } TermVectorOffsetInfo[] offsets = null; if (storeOffsets) { //does the mapper even care about offsets? if (mapper.IsIgnoringOffsets() == false) { offsets = new TermVectorOffsetInfo[freq]; int prevOffset = 0; for (int j = 0; j < freq; j++) { int startOffset = prevOffset + tvf.ReadVInt(); int endOffset = startOffset + tvf.ReadVInt(); offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset); prevOffset = endOffset; } } else { for (int j = 0; j < freq; j++) { tvf.ReadVInt(); tvf.ReadVInt(); } } } mapper.Map(term, freq, offsets, positions); } }
/// <summary>Read as a bit set </summary> private void ReadBits(IndexInput input) { count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits input.ReadBytes(bits, 0, bits.Length); }
internal SkipBuffer(IndexInput input, int length) { data = new byte[length]; pointer = input.GetFilePointer(); input.ReadBytes(data, 0, length); }