public void Read(IndexInput input, FieldInfos fieldInfos, IState state) { this.term = null; // invalidate cache int start = input.ReadVInt(state); int length = input.ReadVInt(state); int totalLength = start + length; if (preUTF8Strings) { text.SetLength(totalLength); input.ReadChars(text.result, start, length, state); } else { if (dirty) { // Fully convert all bytes since bytes is dirty UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes); bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length, state); UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text); dirty = false; } else { // Incrementally convert only the UTF8 bytes that are new: bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length, state); UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text); } } this.field = fieldInfos.FieldName(input.ReadVInt(state)); }
public byte[] GetPayload(byte[] data, int offset, IState state) { if (!needToLoadPayload) { throw new System.IO.IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); } // read payloads lazily byte[] retArray; int retOffset; if (data == null || data.Length - offset < payloadLength) { // the array is too small to store the payload data, // so we allocate a new one retArray = new byte[payloadLength]; retOffset = 0; } else { retArray = data; retOffset = offset; } proxStream.ReadBytes(retArray, retOffset, payloadLength, state); needToLoadPayload = false; return(retArray); }
/// <summary>The value of the field as a String, or null. If null, the Reader value, /// binary value, or TokenStream value is used. Exactly one of StringValue(), /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. /// </summary> public override string StringValue(IState state) { Enclosing_Instance.EnsureOpen(); if (internalIsBinary) { return(null); } if (fieldsData == null) { IndexInput localFieldsStream = GetFieldStream(state); try { localFieldsStream.Seek(pointer, state); if (isCompressed) { var b = new byte[toRead]; localFieldsStream.ReadBytes(b, 0, b.Length, state); fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b)); } else { if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { var bytes = new byte[toRead]; localFieldsStream.ReadBytes(bytes, 0, toRead, state); fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(bytes); } else { //read in chars b/c we already know the length we need to read var chars = new char[toRead]; localFieldsStream.ReadChars(chars, 0, toRead, state); fieldsData = new System.String(chars); } } } catch (System.IO.IOException e) { throw new FieldReaderException(e); } } return((System.String)fieldsData); }
/// <summary>Expert: implements buffer refill. Reads bytes from the current /// position in the input. /// </summary> /// <param name="b">the array to read bytes into /// </param> /// <param name="offset">the offset in the array to start storing bytes /// </param> /// <param name="len">the number of bytes to read /// </param> public override void ReadInternal(byte[] b, int offset, int len, IState state) { long start = FilePointer(state); if (start + len > length) { throw new System.IO.IOException("read past EOF"); } base_Renamed.Seek(fileOffset + start, state); base_Renamed.ReadBytes(b, offset, len, false, state); }
private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize, IState state) { //we have a binary stored field, and it may be compressed if (binary) { int toRead = fieldsStream.ReadVInt(state); var b = new byte[toRead]; fieldsStream.ReadBytes(b, 0, b.Length, state); doc.Add(compressed ? new Field(fi.name, Uncompress(b), Field.Store.YES) : new Field(fi.name, b, Field.Store.YES)); } else { const Field.Store store = Field.Store.YES; Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize); Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector); AbstractField f; if (compressed) { int toRead = fieldsStream.ReadVInt(state); var b = new byte[toRead]; fieldsStream.ReadBytes(b, 0, b.Length, state); f = new Field(fi.name, false, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector) { OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms }; } else { f = new Field(fi.name, false, fieldsStream.ReadString(state), store, index, termVector) { OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms }; } doc.Add(f); } }
/// <summary>Copy the contents of the file with specified extension into the /// provided output stream. Use the provided buffer for moving data /// to reduce memory allocation. /// </summary> private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer, IState state) { IndexInput isRenamed = null; try { long startPtr = os.FilePointer; isRenamed = directory.OpenInput(source.file, state); long length = isRenamed.Length(state); long remainder = length; int chunk = buffer.Length; while (remainder > 0) { var len = (int)Math.Min(chunk, remainder); isRenamed.ReadBytes(buffer, 0, len, false, state); os.WriteBytes(buffer, len); remainder -= len; if (checkAbort != null) { // Roughly every 2 MB we will check if // it's time to abort checkAbort.Work(80, state); } } // Verify that remainder is 0 if (remainder != 0) { throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); } // Verify that the output length diff is equal to original file long endPtr = os.FilePointer; long diff = endPtr - startPtr; if (diff != length) { throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); } } finally { if (isRenamed != null) { isRenamed.Close(); } } }
/// <summary>Copy numBytes bytes from input to ourself. </summary> public virtual void CopyBytes(IndexInput input, long numBytes) { long left = numBytes; if (copyBuffer == null) copyBuffer = new byte[COPY_BUFFER_SIZE]; while (left > 0) { int toCopy; if (left > COPY_BUFFER_SIZE) toCopy = COPY_BUFFER_SIZE; else toCopy = (int) left; input.ReadBytes(copyBuffer, 0, toCopy); WriteBytes(copyBuffer, 0, toCopy); left -= toCopy; } }
/// <summary>Copy numBytes bytes from input to ourself. </summary> public virtual void CopyBytes(IndexInput input, long numBytes) { System.Diagnostics.Debug.Assert(numBytes >= 0, "numBytes=" + numBytes); long left = numBytes; if (copyBuffer == null) copyBuffer = new byte[COPY_BUFFER_SIZE]; while (left > 0) { int toCopy; if (left > COPY_BUFFER_SIZE) toCopy = COPY_BUFFER_SIZE; else toCopy = (int) left; input.ReadBytes(copyBuffer, 0, toCopy); WriteBytes(copyBuffer, 0, toCopy); left -= toCopy; } }
public override byte[] GetBinaryValue(byte[] result, IState state) { Enclosing_Instance.EnsureOpen(); if (internalIsBinary) { if (fieldsData == null) { // Allocate new buffer if result is null or too small byte[] b; if (result == null || result.Length < toRead) { b = new byte[toRead]; } else { b = result; } IndexInput localFieldsStream = GetFieldStream(state); // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people // since they are already handling this exception when getting the document try { localFieldsStream.Seek(pointer, state); localFieldsStream.ReadBytes(b, 0, toRead, state); fieldsData = isCompressed ? Enclosing_Instance.Uncompress(b) : b; } catch (IOException e) { throw new FieldReaderException(e); } internalbinaryOffset = 0; internalBinaryLength = toRead; } return((byte[])fieldsData); } return(null); }
public override void ReadBytes(byte[] b, int offset, int len) { main.ReadBytes(b, offset, len); digest.Update(b, offset, len); }
/// <summary> </summary> /// <param name="field">The field to read in /// </param> /// <param name="tvfPointer">The pointer within the tvf file where we should start reading /// </param> /// <param name="mapper">The mapper used to map the TermVector /// </param> /// <throws> IOException </throws> private void ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper, IState state) { // Now read the data from specified position //We don't need to offset by the FORMAT here since the pointer already includes the offset tvf.Seek(tvfPointer, state); int numTerms = tvf.ReadVInt(state); //System.out.println("Num Terms: " + numTerms); // If no terms - return a constant empty termvector. However, this should never occur! if (numTerms == 0) { return; } bool storePositions; bool storeOffsets; if (format >= FORMAT_VERSION) { byte bits = tvf.ReadByte(state); storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; } else { tvf.ReadVInt(state); storePositions = false; storeOffsets = false; } mapper.SetExpectations(field, numTerms, storeOffsets, storePositions); int start = 0; int deltaLength = 0; int totalLength = 0; byte[] byteBuffer; char[] charBuffer; bool preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES; // init the buffers if (preUTF8) { charBuffer = new char[10]; byteBuffer = null; } else { charBuffer = null; byteBuffer = new byte[20]; } for (int i = 0; i < numTerms; i++) { start = tvf.ReadVInt(state); deltaLength = tvf.ReadVInt(state); totalLength = start + deltaLength; System.String term; if (preUTF8) { // Term stored as java chars if (charBuffer.Length < totalLength) { char[] newCharBuffer = new char[(int)(1.5 * totalLength)]; Array.Copy(charBuffer, 0, newCharBuffer, 0, start); charBuffer = newCharBuffer; } tvf.ReadChars(charBuffer, start, deltaLength, state); term = new System.String(charBuffer, 0, totalLength); } else { // Term stored as utf8 bytes if (byteBuffer.Length < totalLength) { byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)]; Array.Copy(byteBuffer, 0, newByteBuffer, 0, start); byteBuffer = newByteBuffer; } tvf.ReadBytes(byteBuffer, start, deltaLength, state); term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength); } int freq = tvf.ReadVInt(state); int[] positions = null; if (storePositions) { //read in the positions //does the mapper even care about positions? if (mapper.IsIgnoringPositions == false) { positions = new int[freq]; int prevPosition = 0; for (int j = 0; j < freq; j++) { positions[j] = prevPosition + tvf.ReadVInt(state); prevPosition = positions[j]; } } else { //we need to skip over the positions. Since these are VInts, I don't believe there is anyway to know for sure how far to skip // for (int j = 0; j < freq; j++) { tvf.ReadVInt(state); } } } TermVectorOffsetInfo[] offsets = null; if (storeOffsets) { //does the mapper even care about offsets? if (mapper.IsIgnoringOffsets == false) { offsets = new TermVectorOffsetInfo[freq]; int prevOffset = 0; for (int j = 0; j < freq; j++) { int startOffset = prevOffset + tvf.ReadVInt(state); int endOffset = startOffset + tvf.ReadVInt(state); offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset); prevOffset = endOffset; } } else { for (int j = 0; j < freq; j++) { tvf.ReadVInt(state); tvf.ReadVInt(state); } } } mapper.Map(term, freq, offsets, positions); } }
private void CheckReadBytes(IndexInput input, int size, int pos) { // Just to see that "offset" is treated properly in readBytes(), we // add an arbitrary offset at the beginning of the array int offset = size % 10; // arbitrary buffer = ArrayUtil.Grow(buffer, offset + size); Assert.AreEqual(pos, input.FilePointer); long left = TEST_FILE_LENGTH - input.FilePointer; if (left <= 0) { return ; } else if (left < size) { size = (int) left; } input.ReadBytes(buffer, offset, size); Assert.AreEqual(pos + size, input.FilePointer); for (int i = 0; i < size; i++) { Assert.AreEqual(Byten(pos + i), buffer[offset + i], "pos=" + i + " filepos=" + (pos + i)); } }
public virtual void TestCopyBytesMem() { int num = AtLeast(10); for (int iter = 0; iter < num; iter++) { Directory dir = NewDirectory(); if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + " dir=" + dir); } // make random file IndexOutput @out = dir.CreateOutput("test", NewIOContext(Random())); var bytes = new byte[TestUtil.NextInt(Random(), 1, 77777)]; int size = TestUtil.NextInt(Random(), 1, 1777777); int upto = 0; int byteUpto = 0; while (upto < size) { bytes[byteUpto++] = Value(upto); upto++; if (byteUpto == bytes.Length) { @out.WriteBytes(bytes, 0, bytes.Length); byteUpto = 0; } } @out.WriteBytes(bytes, 0, byteUpto); Assert.AreEqual(size, @out.GetFilePointer()); @out.Dispose(); Assert.AreEqual(size, dir.FileLength("test")); // copy from test -> test2 IndexInput @in = dir.OpenInput("test", NewIOContext(Random())); @out = dir.CreateOutput("test2", NewIOContext(Random())); upto = 0; while (upto < size) { if (Random().NextBoolean()) { @out.WriteByte(@in.ReadByte()); upto++; } else { int chunk = Math.Min(TestUtil.NextInt(Random(), 1, bytes.Length), size - upto); @out.CopyBytes(@in, chunk); upto += chunk; } } Assert.AreEqual(size, upto); @out.Dispose(); @in.Dispose(); // verify IndexInput in2 = dir.OpenInput("test2", NewIOContext(Random())); upto = 0; while (upto < size) { if (Random().NextBoolean()) { var v = in2.ReadByte(); Assert.AreEqual(Value(upto), v); upto++; } else { int limit = Math.Min(TestUtil.NextInt(Random(), 1, bytes.Length), size - upto); in2.ReadBytes(bytes, 0, limit); for (int byteIdx = 0; byteIdx < limit; byteIdx++) { Assert.AreEqual(Value(upto), bytes[byteIdx]); upto++; } } } in2.Dispose(); dir.DeleteFile("test"); dir.DeleteFile("test2"); dir.Dispose(); } }
internal SkipBuffer(IndexInput input, int length, IState state) { data = new byte[length]; pointer = input.FilePointer(state); input.ReadBytes(data, 0, length, state); }
public override void ReadInternal(byte[] b, int offset, int length, IState state) { SimOutage(); delegate_Renamed.ReadBytes(b, offset, length, null); }
/// <summary>Read as a bit set </summary> private void ReadBits(IndexInput input, IState state) { count = input.ReadInt(state); // read count bits = new byte[(size >> 3) + 1]; // allocate bits input.ReadBytes(bits, 0, bits.Length, state); }
public override void ReadBytes(byte[] b, int offset, int len) { EnsureOpen(); @delegate.ReadBytes(b, offset, len); }
/// <summary> /// Read the next block of data (<code>For</code> format). /// </summary> /// <param name="in"> the input to use to read data </param> /// <param name="encoded"> a buffer that can be used to store encoded data </param> /// <param name="decoded"> where to write decoded data </param> /// <exception cref="IOException"> If there is a low-level I/O error </exception> public void ReadBlock(IndexInput @in, sbyte[] encoded, int[] decoded) { int numBits = @in.ReadByte(); Debug.Assert(numBits <= 32, numBits.ToString()); if (numBits == ALL_VALUES_EQUAL) { int value = @in.ReadVInt(); CollectionsHelper.Fill(decoded, 0, Lucene41PostingsFormat.BLOCK_SIZE, value); return; } int encodedSize = EncodedSizes[numBits]; @in.ReadBytes(encoded, 0, encodedSize); PackedInts.Decoder decoder = Decoders[numBits]; int iters = Iterations[numBits]; Debug.Assert(iters * decoder.ByteValueCount() >= Lucene41PostingsFormat.BLOCK_SIZE); decoder.Decode(encoded, 0, decoded, 0, iters); }
private void AssertSameStreams(string msg, IndexInput expected, IndexInput test) { Assert.IsNotNull(expected, msg + " null expected"); Assert.IsNotNull(test, msg + " null test"); Assert.AreEqual(expected.Length(), test.Length(), msg + " length"); Assert.AreEqual(expected.FilePointer, test.FilePointer, msg + " position"); var expectedBuffer = new byte[512]; var testBuffer = new byte[expectedBuffer.Length]; long remainder = expected.Length() - expected.FilePointer; while (remainder > 0) { int readLen = (int)Math.Min(remainder, expectedBuffer.Length); expected.ReadBytes(expectedBuffer, 0, readLen); test.ReadBytes(testBuffer, 0, readLen); AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen); remainder -= readLen; } }
public virtual void TestDirectInstantiation() { DirectoryInfo path = CreateTempDir("testDirectInstantiation"); byte[] largeBuffer = new byte[Random.Next(256 * 1024)], largeReadBuffer = new byte[largeBuffer.Length]; for (int i = 0; i < largeBuffer.Length; i++) { largeBuffer[i] = (byte)i; // automatically loops with modulo } var dirs = new FSDirectory[] { new SimpleFSDirectory(path, null), new NIOFSDirectory(path, null), new MMapDirectory(path, null) }; for (int i = 0; i < dirs.Length; i++) { FSDirectory dir = dirs[i]; dir.EnsureOpen(); string fname = "foo." + i; string lockname = "foo" + i + ".lck"; IndexOutput @out = dir.CreateOutput(fname, NewIOContext(Random)); @out.WriteByte((byte)(sbyte)i); @out.WriteBytes(largeBuffer, largeBuffer.Length); @out.Dispose(); for (int j = 0; j < dirs.Length; j++) { FSDirectory d2 = dirs[j]; d2.EnsureOpen(); Assert.IsTrue(SlowFileExists(d2, fname)); Assert.AreEqual(1 + largeBuffer.Length, d2.FileLength(fname)); // LUCENENET specific - unmap hack not needed //// don't do read tests if unmapping is not supported! //if (d2 is MMapDirectory && !((MMapDirectory)d2).UseUnmap) //{ // continue; //} IndexInput input = d2.OpenInput(fname, NewIOContext(Random)); Assert.AreEqual((byte)i, input.ReadByte()); // read array with buffering enabled Arrays.Fill(largeReadBuffer, (byte)0); input.ReadBytes(largeReadBuffer, 0, largeReadBuffer.Length, true); Assert.AreEqual(largeBuffer, largeReadBuffer); // read again without using buffer input.Seek(1L); Arrays.Fill(largeReadBuffer, (byte)0); input.ReadBytes(largeReadBuffer, 0, largeReadBuffer.Length, false); Assert.AreEqual(largeBuffer, largeReadBuffer); input.Dispose(); } // delete with a different dir dirs[(i + 1) % dirs.Length].DeleteFile(fname); for (int j = 0; j < dirs.Length; j++) { FSDirectory d2 = dirs[j]; Assert.IsFalse(SlowFileExists(d2, fname)); } Lock @lock = dir.MakeLock(lockname); Assert.IsTrue(@lock.Obtain()); for (int j = 0; j < dirs.Length; j++) { FSDirectory d2 = dirs[j]; Lock lock2 = d2.MakeLock(lockname); try { Assert.IsFalse(lock2.Obtain(1)); } #pragma warning disable 168 catch (LockObtainFailedException e) #pragma warning restore 168 { // OK } } @lock.Dispose(); // now lock with different dir @lock = dirs[(i + 1) % dirs.Length].MakeLock(lockname); Assert.IsTrue(@lock.Obtain()); @lock.Dispose(); } for (int i = 0; i < dirs.Length; i++) { FSDirectory dir = dirs[i]; dir.EnsureOpen(); dir.Dispose(); Assert.IsFalse(dir.IsOpen); } }