Exemple #1
0
        public void  Read(IndexInput input, FieldInfos fieldInfos, IState state)
        {
            this.term = null; // invalidate cache
            int start       = input.ReadVInt(state);
            int length      = input.ReadVInt(state);
            int totalLength = start + length;

            if (preUTF8Strings)
            {
                text.SetLength(totalLength);
                input.ReadChars(text.result, start, length, state);
            }
            else
            {
                if (dirty)
                {
                    // Fully convert all bytes since bytes is dirty
                    UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length, state);
                    UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
                    dirty = false;
                }
                else
                {
                    // Incrementally convert only the UTF8 bytes that are new:
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length, state);
                    UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
                }
            }
            this.field = fieldInfos.FieldName(input.ReadVInt(state));
        }
        public byte[] GetPayload(byte[] data, int offset, IState state)
        {
            if (!needToLoadPayload)
            {
                throw new System.IO.IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
            }

            // read payloads lazily
            byte[] retArray;
            int    retOffset;

            if (data == null || data.Length - offset < payloadLength)
            {
                // the array is too small to store the payload data,
                // so we allocate a new one
                retArray  = new byte[payloadLength];
                retOffset = 0;
            }
            else
            {
                retArray  = data;
                retOffset = offset;
            }
            proxStream.ReadBytes(retArray, retOffset, payloadLength, state);
            needToLoadPayload = false;
            return(retArray);
        }
Exemple #3
0
            /// <summary>The value of the field as a String, or null.  If null, the Reader value,
            /// binary value, or TokenStream value is used.  Exactly one of StringValue(),
            /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set.
            /// </summary>
            public override string StringValue(IState state)
            {
                Enclosing_Instance.EnsureOpen();
                if (internalIsBinary)
                {
                    return(null);
                }

                if (fieldsData == null)
                {
                    IndexInput localFieldsStream = GetFieldStream(state);
                    try
                    {
                        localFieldsStream.Seek(pointer, state);
                        if (isCompressed)
                        {
                            var b = new byte[toRead];
                            localFieldsStream.ReadBytes(b, 0, b.Length, state);
                            fieldsData =
                                System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
                        }
                        else
                        {
                            if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                            {
                                var bytes = new byte[toRead];
                                localFieldsStream.ReadBytes(bytes, 0, toRead, state);
                                fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(bytes);
                            }
                            else
                            {
                                //read in chars b/c we already know the length we need to read
                                var chars = new char[toRead];
                                localFieldsStream.ReadChars(chars, 0, toRead, state);
                                fieldsData = new System.String(chars);
                            }
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        throw new FieldReaderException(e);
                    }
                }
                return((System.String)fieldsData);
            }
Exemple #4
0
            /// <summary>Expert: implements buffer refill.  Reads bytes from the current
            /// position in the input.
            /// </summary>
            /// <param name="b">the array to read bytes into
            /// </param>
            /// <param name="offset">the offset in the array to start storing bytes
            /// </param>
            /// <param name="len">the number of bytes to read
            /// </param>
            public override void  ReadInternal(byte[] b, int offset, int len, IState state)
            {
                long start = FilePointer(state);

                if (start + len > length)
                {
                    throw new System.IO.IOException("read past EOF");
                }
                base_Renamed.Seek(fileOffset + start, state);
                base_Renamed.ReadBytes(b, offset, len, false, state);
            }
Exemple #5
0
        private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize, IState state)
        {
            //we have a binary stored field, and it may be compressed
            if (binary)
            {
                int toRead = fieldsStream.ReadVInt(state);
                var b      = new byte[toRead];
                fieldsStream.ReadBytes(b, 0, b.Length, state);
                doc.Add(compressed ? new Field(fi.name, Uncompress(b), Field.Store.YES) : new Field(fi.name, b, Field.Store.YES));
            }
            else
            {
                const Field.Store store      = Field.Store.YES;
                Field.Index       index      = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
                Field.TermVector  termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

                AbstractField f;
                if (compressed)
                {
                    int toRead = fieldsStream.ReadVInt(state);

                    var b = new byte[toRead];
                    fieldsStream.ReadBytes(b, 0, b.Length, state);
                    f = new Field(fi.name, false, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index,
                                  termVector)
                    {
                        OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms
                    };
                }
                else
                {
                    f = new Field(fi.name, false, fieldsStream.ReadString(state), store, index, termVector)
                    {
                        OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms
                    };
                }

                doc.Add(f);
            }
        }
        /// <summary>Copy the contents of the file with specified extension into the
        /// provided output stream. Use the provided buffer for moving data
        /// to reduce memory allocation.
        /// </summary>
        private void  CopyFile(FileEntry source, IndexOutput os, byte[] buffer, IState state)
        {
            IndexInput isRenamed = null;

            try
            {
                long startPtr = os.FilePointer;

                isRenamed = directory.OpenInput(source.file, state);
                long length    = isRenamed.Length(state);
                long remainder = length;
                int  chunk     = buffer.Length;

                while (remainder > 0)
                {
                    var len = (int)Math.Min(chunk, remainder);
                    isRenamed.ReadBytes(buffer, 0, len, false, state);
                    os.WriteBytes(buffer, len);
                    remainder -= len;
                    if (checkAbort != null)
                    {
                        // Roughly every 2 MB we will check if
                        // it's time to abort
                        checkAbort.Work(80, state);
                    }
                }

                // Verify that remainder is 0
                if (remainder != 0)
                {
                    throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
                }

                // Verify that the output length diff is equal to original file
                long endPtr = os.FilePointer;
                long diff   = endPtr - startPtr;
                if (diff != length)
                {
                    throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
                }
            }
            finally
            {
                if (isRenamed != null)
                {
                    isRenamed.Close();
                }
            }
        }
 /// <summary>Copy numBytes bytes from input to ourself. </summary>
 public virtual void CopyBytes(IndexInput input, long numBytes)
 {
     long left = numBytes;
     if (copyBuffer == null)
         copyBuffer = new byte[COPY_BUFFER_SIZE];
     while (left > 0)
     {
         int toCopy;
         if (left > COPY_BUFFER_SIZE)
             toCopy = COPY_BUFFER_SIZE;
         else
             toCopy = (int) left;
         input.ReadBytes(copyBuffer, 0, toCopy);
         WriteBytes(copyBuffer, 0, toCopy);
         left -= toCopy;
     }
 }
 /// <summary>Copy numBytes bytes from input to ourself. </summary>
 public virtual void CopyBytes(IndexInput input, long numBytes)
 {
     System.Diagnostics.Debug.Assert(numBytes >= 0, "numBytes=" + numBytes);
     long left = numBytes;
     if (copyBuffer == null)
         copyBuffer = new byte[COPY_BUFFER_SIZE];
     while (left > 0)
     {
         int toCopy;
         if (left > COPY_BUFFER_SIZE)
             toCopy = COPY_BUFFER_SIZE;
         else
             toCopy = (int) left;
         input.ReadBytes(copyBuffer, 0, toCopy);
         WriteBytes(copyBuffer, 0, toCopy);
         left -= toCopy;
     }
 }
Exemple #9
0
            public override byte[] GetBinaryValue(byte[] result, IState state)
            {
                Enclosing_Instance.EnsureOpen();

                if (internalIsBinary)
                {
                    if (fieldsData == null)
                    {
                        // Allocate new buffer if result is null or too small
                        byte[] b;
                        if (result == null || result.Length < toRead)
                        {
                            b = new byte[toRead];
                        }
                        else
                        {
                            b = result;
                        }

                        IndexInput localFieldsStream = GetFieldStream(state);

                        // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
                        // since they are already handling this exception when getting the document
                        try
                        {
                            localFieldsStream.Seek(pointer, state);
                            localFieldsStream.ReadBytes(b, 0, toRead, state);
                            fieldsData = isCompressed ? Enclosing_Instance.Uncompress(b) : b;
                        }
                        catch (IOException e)
                        {
                            throw new FieldReaderException(e);
                        }

                        internalbinaryOffset = 0;
                        internalBinaryLength = toRead;
                    }

                    return((byte[])fieldsData);
                }
                return(null);
            }
 public override void ReadBytes(byte[] b, int offset, int len)
 {
     main.ReadBytes(b, offset, len);
     digest.Update(b, offset, len);
 }
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <param name="mapper">The mapper used to map the TermVector
        /// </param>
        /// <throws>  IOException </throws>
        private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper, IState state)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer, state);

            int numTerms = tvf.ReadVInt(state);

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return;
            }

            bool storePositions;
            bool storeOffsets;

            if (format >= FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte(state);
                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt(state);
                storePositions = false;
                storeOffsets   = false;
            }
            mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            byte[] byteBuffer;
            char[] charBuffer;
            bool   preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;

            // init the buffers
            if (preUTF8)
            {
                charBuffer = new char[10];
                byteBuffer = null;
            }
            else
            {
                charBuffer = null;
                byteBuffer = new byte[20];
            }

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt(state);
                deltaLength = tvf.ReadVInt(state);
                totalLength = start + deltaLength;

                System.String term;

                if (preUTF8)
                {
                    // Term stored as java chars
                    if (charBuffer.Length < totalLength)
                    {
                        char[] newCharBuffer = new char[(int)(1.5 * totalLength)];
                        Array.Copy(charBuffer, 0, newCharBuffer, 0, start);
                        charBuffer = newCharBuffer;
                    }
                    tvf.ReadChars(charBuffer, start, deltaLength, state);
                    term = new System.String(charBuffer, 0, totalLength);
                }
                else
                {
                    // Term stored as utf8 bytes
                    if (byteBuffer.Length < totalLength)
                    {
                        byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)];
                        Array.Copy(byteBuffer, 0, newByteBuffer, 0, start);
                        byteBuffer = newByteBuffer;
                    }
                    tvf.ReadBytes(byteBuffer, start, deltaLength, state);
                    term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength);
                }
                int   freq      = tvf.ReadVInt(state);
                int[] positions = null;
                if (storePositions)
                {
                    //read in the positions
                    //does the mapper even care about positions?
                    if (mapper.IsIgnoringPositions == false)
                    {
                        positions = new int[freq];
                        int prevPosition = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            positions[j] = prevPosition + tvf.ReadVInt(state);
                            prevPosition = positions[j];
                        }
                    }
                    else
                    {
                        //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
                        //
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt(state);
                        }
                    }
                }
                TermVectorOffsetInfo[] offsets = null;
                if (storeOffsets)
                {
                    //does the mapper even care about offsets?
                    if (mapper.IsIgnoringOffsets == false)
                    {
                        offsets = new TermVectorOffsetInfo[freq];
                        int prevOffset = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            int startOffset = prevOffset + tvf.ReadVInt(state);
                            int endOffset   = startOffset + tvf.ReadVInt(state);
                            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
                            prevOffset = endOffset;
                        }
                    }
                    else
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt(state);
                            tvf.ReadVInt(state);
                        }
                    }
                }
                mapper.Map(term, freq, offsets, positions);
            }
        }
		private void  CheckReadBytes(IndexInput input, int size, int pos)
		{
			// Just to see that "offset" is treated properly in readBytes(), we
			// add an arbitrary offset at the beginning of the array
			int offset = size % 10; // arbitrary
			buffer = ArrayUtil.Grow(buffer, offset + size);
			Assert.AreEqual(pos, input.FilePointer);
			long left = TEST_FILE_LENGTH - input.FilePointer;
			if (left <= 0)
			{
				return ;
			}
			else if (left < size)
			{
				size = (int) left;
			}
			input.ReadBytes(buffer, offset, size);
			Assert.AreEqual(pos + size, input.FilePointer);
			for (int i = 0; i < size; i++)
			{
				Assert.AreEqual(Byten(pos + i), buffer[offset + i], "pos=" + i + " filepos=" + (pos + i));
			}
		}
Exemple #13
0
        public virtual void TestCopyBytesMem()
        {
            int num = AtLeast(10);

            for (int iter = 0; iter < num; iter++)
            {
                Directory dir = NewDirectory();
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter + " dir=" + dir);
                }

                // make random file
                IndexOutput @out     = dir.CreateOutput("test", NewIOContext(Random()));
                var         bytes    = new byte[TestUtil.NextInt(Random(), 1, 77777)];
                int         size     = TestUtil.NextInt(Random(), 1, 1777777);
                int         upto     = 0;
                int         byteUpto = 0;
                while (upto < size)
                {
                    bytes[byteUpto++] = Value(upto);
                    upto++;
                    if (byteUpto == bytes.Length)
                    {
                        @out.WriteBytes(bytes, 0, bytes.Length);
                        byteUpto = 0;
                    }
                }

                @out.WriteBytes(bytes, 0, byteUpto);
                Assert.AreEqual(size, @out.GetFilePointer());
                @out.Dispose();
                Assert.AreEqual(size, dir.FileLength("test"));

                // copy from test -> test2
                IndexInput @in = dir.OpenInput("test", NewIOContext(Random()));

                @out = dir.CreateOutput("test2", NewIOContext(Random()));

                upto = 0;
                while (upto < size)
                {
                    if (Random().NextBoolean())
                    {
                        @out.WriteByte(@in.ReadByte());
                        upto++;
                    }
                    else
                    {
                        int chunk = Math.Min(TestUtil.NextInt(Random(), 1, bytes.Length), size - upto);
                        @out.CopyBytes(@in, chunk);
                        upto += chunk;
                    }
                }
                Assert.AreEqual(size, upto);
                @out.Dispose();
                @in.Dispose();

                // verify
                IndexInput in2 = dir.OpenInput("test2", NewIOContext(Random()));
                upto = 0;
                while (upto < size)
                {
                    if (Random().NextBoolean())
                    {
                        var v = in2.ReadByte();
                        Assert.AreEqual(Value(upto), v);
                        upto++;
                    }
                    else
                    {
                        int limit = Math.Min(TestUtil.NextInt(Random(), 1, bytes.Length), size - upto);
                        in2.ReadBytes(bytes, 0, limit);
                        for (int byteIdx = 0; byteIdx < limit; byteIdx++)
                        {
                            Assert.AreEqual(Value(upto), bytes[byteIdx]);
                            upto++;
                        }
                    }
                }
                in2.Dispose();

                dir.DeleteFile("test");
                dir.DeleteFile("test2");

                dir.Dispose();
            }
        }
 internal SkipBuffer(IndexInput input, int length, IState state)
 {
     data    = new byte[length];
     pointer = input.FilePointer(state);
     input.ReadBytes(data, 0, length, state);
 }
Exemple #15
0
 public override void  ReadInternal(byte[] b, int offset, int length, IState state)
 {
     SimOutage();
     delegate_Renamed.ReadBytes(b, offset, length, null);
 }
Exemple #16
0
 /// <summary>Read as a bit set </summary>
 private void  ReadBits(IndexInput input, IState state)
 {
     count = input.ReadInt(state);             // read count
     bits  = new byte[(size >> 3) + 1];        // allocate bits
     input.ReadBytes(bits, 0, bits.Length, state);
 }
Exemple #17
0
 public override void ReadBytes(byte[] b, int offset, int len)
 {
     EnsureOpen();
     @delegate.ReadBytes(b, offset, len);
 }
Exemple #18
0
        /// <summary>
        /// Read the next block of data (<code>For</code> format).
        /// </summary>
        /// <param name="in">        the input to use to read data </param>
        /// <param name="encoded">   a buffer that can be used to store encoded data </param>
        /// <param name="decoded">   where to write decoded data </param>
        /// <exception cref="IOException"> If there is a low-level I/O error </exception>
        public void ReadBlock(IndexInput @in, sbyte[] encoded, int[] decoded)
        {
            int numBits = @in.ReadByte();
            Debug.Assert(numBits <= 32, numBits.ToString());

            if (numBits == ALL_VALUES_EQUAL)
            {
                int value = @in.ReadVInt();
                CollectionsHelper.Fill(decoded, 0, Lucene41PostingsFormat.BLOCK_SIZE, value);
                return;
            }

            int encodedSize = EncodedSizes[numBits];
            @in.ReadBytes(encoded, 0, encodedSize);

            PackedInts.Decoder decoder = Decoders[numBits];
            int iters = Iterations[numBits];
            Debug.Assert(iters * decoder.ByteValueCount() >= Lucene41PostingsFormat.BLOCK_SIZE);

            decoder.Decode(encoded, 0, decoded, 0, iters);
        }
        private void AssertSameStreams(string msg, IndexInput expected, IndexInput test)
        {
            Assert.IsNotNull(expected, msg + " null expected");
            Assert.IsNotNull(test, msg + " null test");
            Assert.AreEqual(expected.Length(), test.Length(), msg + " length");
            Assert.AreEqual(expected.FilePointer, test.FilePointer, msg + " position");

            var expectedBuffer = new byte[512];
            var testBuffer = new byte[expectedBuffer.Length];

            long remainder = expected.Length() - expected.FilePointer;
            while (remainder > 0)
            {
                int readLen = (int)Math.Min(remainder, expectedBuffer.Length);
                expected.ReadBytes(expectedBuffer, 0, readLen);
                test.ReadBytes(testBuffer, 0, readLen);
                AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen);
                remainder -= readLen;
            }
        }
Exemple #20
0
        public virtual void TestDirectInstantiation()
        {
            DirectoryInfo path = CreateTempDir("testDirectInstantiation");

            byte[] largeBuffer = new byte[Random.Next(256 * 1024)], largeReadBuffer = new byte[largeBuffer.Length];
            for (int i = 0; i < largeBuffer.Length; i++)
            {
                largeBuffer[i] = (byte)i; // automatically loops with modulo
            }

            var dirs = new FSDirectory[] { new SimpleFSDirectory(path, null), new NIOFSDirectory(path, null), new MMapDirectory(path, null) };

            for (int i = 0; i < dirs.Length; i++)
            {
                FSDirectory dir = dirs[i];
                dir.EnsureOpen();
                string      fname    = "foo." + i;
                string      lockname = "foo" + i + ".lck";
                IndexOutput @out     = dir.CreateOutput(fname, NewIOContext(Random));
                @out.WriteByte((byte)(sbyte)i);
                @out.WriteBytes(largeBuffer, largeBuffer.Length);
                @out.Dispose();

                for (int j = 0; j < dirs.Length; j++)
                {
                    FSDirectory d2 = dirs[j];
                    d2.EnsureOpen();
                    Assert.IsTrue(SlowFileExists(d2, fname));
                    Assert.AreEqual(1 + largeBuffer.Length, d2.FileLength(fname));

                    // LUCENENET specific - unmap hack not needed
                    //// don't do read tests if unmapping is not supported!
                    //if (d2 is MMapDirectory && !((MMapDirectory)d2).UseUnmap)
                    //{
                    //    continue;
                    //}

                    IndexInput input = d2.OpenInput(fname, NewIOContext(Random));
                    Assert.AreEqual((byte)i, input.ReadByte());
                    // read array with buffering enabled
                    Arrays.Fill(largeReadBuffer, (byte)0);
                    input.ReadBytes(largeReadBuffer, 0, largeReadBuffer.Length, true);
                    Assert.AreEqual(largeBuffer, largeReadBuffer);
                    // read again without using buffer
                    input.Seek(1L);
                    Arrays.Fill(largeReadBuffer, (byte)0);
                    input.ReadBytes(largeReadBuffer, 0, largeReadBuffer.Length, false);
                    Assert.AreEqual(largeBuffer, largeReadBuffer);
                    input.Dispose();
                }

                // delete with a different dir
                dirs[(i + 1) % dirs.Length].DeleteFile(fname);

                for (int j = 0; j < dirs.Length; j++)
                {
                    FSDirectory d2 = dirs[j];
                    Assert.IsFalse(SlowFileExists(d2, fname));
                }

                Lock @lock = dir.MakeLock(lockname);
                Assert.IsTrue(@lock.Obtain());

                for (int j = 0; j < dirs.Length; j++)
                {
                    FSDirectory d2    = dirs[j];
                    Lock        lock2 = d2.MakeLock(lockname);
                    try
                    {
                        Assert.IsFalse(lock2.Obtain(1));
                    }
#pragma warning disable 168
                    catch (LockObtainFailedException e)
#pragma warning restore 168
                    {
                        // OK
                    }
                }

                @lock.Dispose();

                // now lock with different dir
                @lock = dirs[(i + 1) % dirs.Length].MakeLock(lockname);
                Assert.IsTrue(@lock.Obtain());
                @lock.Dispose();
            }

            for (int i = 0; i < dirs.Length; i++)
            {
                FSDirectory dir = dirs[i];
                dir.EnsureOpen();
                dir.Dispose();
                Assert.IsFalse(dir.IsOpen);
            }
        }