Пример #1
0
        public void  Read(IndexInput input, FieldInfos fieldInfos)
        {
            this.term = null;             // invalidate cache
            int start       = input.ReadVInt();
            int length      = input.ReadVInt();
            int totalLength = start + length;

            if (preUTF8Strings)
            {
                text.SetLength(totalLength);
                input.ReadChars(text.result, start, length);
            }
            else
            {
                if (dirty)
                {
                    // Fully convert all bytes since bytes is dirty
                    UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
                    dirty = false;
                }
                else
                {
                    // Incrementally convert only the UTF8 bytes that are new:
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
                }
            }
            this.field = fieldInfos.FieldName(input.ReadVInt());
        }
        private void  ReadFile(Directory dir, System.String name)
        {
            IndexInput input = dir.OpenInput(name);

            try
            {
                long size      = dir.FileLength(name);
                long bytesLeft = size;
                while (bytesLeft > 0)
                {
                    int numToRead;
                    if (bytesLeft < buffer.Length)
                    {
                        numToRead = (int)bytesLeft;
                    }
                    else
                    {
                        numToRead = buffer.Length;
                    }
                    input.ReadBytes(buffer, 0, numToRead, false);
                    bytesLeft -= numToRead;
                }
                // Don't do this in your real backups!  This is just
                // to force a backup to take a somewhat long time, to
                // make sure we are exercising the fact that the
                // IndexWriter should not delete this file even when I
                // take my time reading it.
                System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1));
            }
            finally
            {
                input.Close();
            }
        }
        public byte[] GetPayload(byte[] data, int offset)
        {
            if (!needToLoadPayload)
            {
                throw new System.IO.IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
            }

            // read payloads lazily
            byte[] retArray;
            int    retOffset;

            if (data == null || data.Length - offset < payloadLength)
            {
                // the array is too small to store the payload data,
                // so we allocate a new one
                retArray  = new byte[payloadLength];
                retOffset = 0;
            }
            else
            {
                retArray  = data;
                retOffset = offset;
            }
            proxStream.ReadBytes(retArray, retOffset, payloadLength);
            needToLoadPayload = false;
            return(retArray);
        }
Пример #4
0
        /// <summary>Read norms into a pre-allocated array. </summary>
        public override void  Norms(System.String field, byte[] bytes, int offset)
        {
            lock (this)
            {
                Norm norm = (Norm)norms[field];
                if (norm == null)
                {
                    Array.Copy(FakeNorms(), 0, bytes, offset, MaxDoc());
                    return;
                }

                if (norm.bytes != null)
                {
                    // can copy from cache
                    Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
                    return;
                }

                IndexInput normStream = (IndexInput)norm.in_Renamed.Clone();
                try
                {
                    // read from disk
                    normStream.Seek(0);
                    normStream.ReadBytes(bytes, offset, MaxDoc());
                }
                finally
                {
                    normStream.Close();
                }
            }
        }
Пример #5
0
 /// <summary>The value of the field in Binary, or null.  If null, the Reader value,
 /// String value, or TokenStream value is used. Exactly one of stringValue(),
 /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
 /// </summary>
 public override byte[] BinaryValue()
 {
     Enclosing_Instance.EnsureOpen();
     if (fieldsData == null)
     {
         byte[]     b = new byte[toRead];
         IndexInput localFieldsStream = GetFieldStream();
         //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people
         //since they are already handling this exception when getting the document
         try
         {
             localFieldsStream.Seek(pointer);
             localFieldsStream.ReadBytes(b, 0, b.Length);
             if (isCompressed == true)
             {
                 fieldsData = Enclosing_Instance.Uncompress(b);
             }
             else
             {
                 fieldsData = b;
             }
         }
         catch (System.IO.IOException e)
         {
             throw new FieldReaderException(e);
         }
     }
     return(fieldsData is byte[] ? (byte[])fieldsData : null);
 }
Пример #6
0
 /// <summary>The value of the field as a String, or null.  If null, the Reader value,
 /// binary value, or TokenStream value is used.  Exactly one of stringValue(),
 /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
 /// </summary>
 public override System.String StringValue()
 {
     Enclosing_Instance.EnsureOpen();
     if (fieldsData == null)
     {
         IndexInput localFieldsStream = GetFieldStream();
         try
         {
             localFieldsStream.Seek(pointer);
             if (isCompressed)
             {
                 byte[] b = new byte[toRead];
                 localFieldsStream.ReadBytes(b, 0, b.Length);
                 fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
             }
             else
             {
                 //read in chars b/c we already know the length we need to read
                 char[] chars = new char[toRead];
                 localFieldsStream.ReadChars(chars, 0, toRead);
                 fieldsData = new System.String(chars);
             }
         }
         catch (System.IO.IOException e)
         {
             throw new FieldReaderException(e);
         }
     }
     return(fieldsData is System.String ? (System.String)fieldsData : null);
 }
Пример #7
0
 /// <summary>The value of the field as a String, or null.  If null, the Reader value,
 /// binary value, or TokenStream value is used.  Exactly one of stringValue(),
 /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
 /// </summary>
 public override System.String StringValue()
 {
     Enclosing_Instance.EnsureOpen();
     if (isBinary)
     {
         return(null);
     }
     else
     {
         if (fieldsData == null)
         {
             IndexInput localFieldsStream = GetFieldStream();
             try
             {
                 localFieldsStream.Seek(pointer);
                 if (isCompressed)
                 {
                     byte[] b = new byte[toRead];
                     localFieldsStream.ReadBytes(b, 0, b.Length);
                     fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
                 }
                 else
                 {
                     if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                     {
                         byte[] bytes = new byte[toRead];
                         localFieldsStream.ReadBytes(bytes, 0, toRead);
                         fieldsData = System.Text.Encoding.UTF8.GetString(bytes);
                     }
                     else
                     {
                         //read in chars b/c we already know the length we need to read
                         char[] chars = new char[toRead];
                         localFieldsStream.ReadChars(chars, 0, toRead);
                         fieldsData = new System.String(chars);
                     }
                 }
             }
             catch (System.IO.IOException e)
             {
                 throw new FieldReaderException(e);
             }
         }
     }
     return((string)fieldsData);
 }
Пример #8
0
        // in merge mode we don't uncompress the data of a compressed field
        private void  AddFieldForMerge(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
        {
            System.Object data;

            if (binary || compressed)
            {
                int    toRead = fieldsStream.ReadVInt();
                byte[] b      = new byte[toRead];
                fieldsStream.ReadBytes(b, 0, b.Length);
                data = b;
            }
            else
            {
                data = fieldsStream.ReadString();
            }

            doc.Add(new FieldForMerge(data, fi, binary, compressed, tokenize));
        }
Пример #9
0
        public virtual void  TestReadPastEOF()
        {
            SetUp_2();
            CompoundFileReader cr         = new CompoundFileReader(dir, "f.comp", null);
            IndexInput         is_Renamed = cr.OpenInput("f2", null);

            is_Renamed.Seek(is_Renamed.Length(null) - 10, null);
            byte[] b = new byte[100];
            is_Renamed.ReadBytes(b, 0, 10, null);

            Assert.Throws <System.IO.IOException>(() => is_Renamed.ReadByte(null), "Single byte read past end of file");

            is_Renamed.Seek(is_Renamed.Length(null) - 10, null);
            Assert.Throws <System.IO.IOException>(() => is_Renamed.ReadBytes(b, 0, 50, null), "Block read past end of file");

            is_Renamed.Close();
            cr.Close();
        }
Пример #10
0
            /// <summary>Expert: implements buffer refill.  Reads bytes from the current
            /// position in the input.
            /// </summary>
            /// <param name="b">the array to read bytes into
            /// </param>
            /// <param name="offset">the offset in the array to start storing bytes
            /// </param>
            /// <param name="len">the number of bytes to read
            /// </param>
            public override void  ReadInternal(byte[] b, int offset, int len)
            {
                long start = GetFilePointer();

                if (start + len > length)
                {
                    throw new System.IO.IOException("read past EOF");
                }
                base_Renamed.Seek(fileOffset + start);
                base_Renamed.ReadBytes(b, offset, len, false);
            }
Пример #11
0
        private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
        {
            //we have a binary stored field, and it may be compressed
            if (binary)
            {
                int toRead = fieldsStream.ReadVInt();
                var b      = new byte[toRead];
                fieldsStream.ReadBytes(b, 0, b.Length);
                doc.Add(compressed ? new Field(fi.name, Uncompress(b), Field.Store.YES) : new Field(fi.name, b, Field.Store.YES));
            }
            else
            {
                const Field.Store store      = Field.Store.YES;
                Field.Index       index      = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
                Field.TermVector  termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

                AbstractField f;
                if (compressed)
                {
                    int toRead = fieldsStream.ReadVInt();

                    var b = new byte[toRead];
                    fieldsStream.ReadBytes(b, 0, b.Length);
                    f = new Field(fi.name, false, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index,
                                  termVector)
                    {
                        OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms
                    };
                }
                else
                {
                    f = new Field(fi.name, false, fieldsStream.ReadString(), store, index, termVector)
                    {
                        OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms
                    };
                }

                doc.Add(f);
            }
        }
Пример #12
0
            public override byte[] GetBinaryValue(byte[] result)
            {
                Enclosing_Instance.EnsureOpen();

                if (isBinary)
                {
                    if (fieldsData == null)
                    {
                        // Allocate new buffer if result is null or too small
                        byte[] b;
                        if (result == null || result.Length < toRead)
                        {
                            b = new byte[toRead];
                        }
                        else
                        {
                            b = result;
                        }

                        IndexInput localFieldsStream = GetFieldStream();

                        // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
                        // since they are already handling this exception when getting the document
                        try
                        {
                            localFieldsStream.Seek(pointer);
                            localFieldsStream.ReadBytes(b, 0, toRead);
                            if (isCompressed == true)
                            {
                                fieldsData = Enclosing_Instance.Uncompress(b);
                            }
                            else
                            {
                                fieldsData = b;
                            }
                        }
                        catch (System.IO.IOException e)
                        {
                            throw new FieldReaderException(e);
                        }

                        binaryOffset = 0;
                        binaryLength = toRead;
                    }

                    return((byte[])fieldsData);
                }
                else
                {
                    return(null);
                }
            }
Пример #13
0
        /// <summary>Copy the contents of the file with specified extension into the
        /// provided output stream. Use the provided buffer for moving data
        /// to reduce memory allocation.
        /// </summary>
        private void  CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
        {
            IndexInput is_Renamed = null;

            try
            {
                long startPtr = os.GetFilePointer();

                is_Renamed = directory.OpenInput(source.file);
                long length    = is_Renamed.Length();
                long remainder = length;
                int  chunk     = buffer.Length;

                while (remainder > 0)
                {
                    int len = (int)System.Math.Min(chunk, remainder);
                    is_Renamed.ReadBytes(buffer, 0, len, false);
                    os.WriteBytes(buffer, len);
                    remainder -= len;
                    if (checkAbort != null)
                    {
                        // Roughly every 2 MB we will check if
                        // it's time to abort
                        checkAbort.Work(80);
                    }
                }

                // Verify that remainder is 0
                if (remainder != 0)
                {
                    throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
                }

                // Verify that the output length diff is equal to original file
                long endPtr = os.GetFilePointer();
                long diff   = endPtr - startPtr;
                if (diff != length)
                {
                    throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
                }
            }
            finally
            {
                if (is_Renamed != null)
                {
                    is_Renamed.Close();
                }
            }
        }
Пример #14
0
        public virtual void  TestReadPastEOF()
        {
            SetUp_2();
            CompoundFileReader cr         = new CompoundFileReader(dir, "f.comp");
            IndexInput         is_Renamed = cr.OpenInput("f2");

            is_Renamed.Seek(is_Renamed.Length() - 10);
            byte[] b = new byte[100];
            is_Renamed.ReadBytes(b, 0, 10);

            try
            {
                byte test = is_Renamed.ReadByte();
                Assert.Fail("Single byte read past end of file");
            }
            catch (System.IO.IOException e)
            {
                /* success */
                //System.out.println("SUCCESS: single byte read past end of file: " + e);
            }

            is_Renamed.Seek(is_Renamed.Length() - 10);
            try
            {
                is_Renamed.ReadBytes(b, 0, 50);
                Assert.Fail("Block read past end of file");
            }
            catch (System.IO.IOException e)
            {
                /* success */
                //System.out.println("SUCCESS: block read past end of file: " + e);
            }

            is_Renamed.Close();
            cr.Close();
        }
Пример #15
0
        /// <summary>Constructs a bit vector from the file <code>name</code> in Directory
        /// <code>d</code>, as written by the {@link #write} method.
        /// </summary>
        public BitVector(Directory d, System.String name)
        {
            IndexInput input = d.OpenInput(name);

            try
            {
                size  = input.ReadInt();                // read size
                count = input.ReadInt();                // read count
                bits  = new byte[(size >> 3) + 1];      // allocate bits
                input.ReadBytes(bits, 0, bits.Length);  // read bits
            }
            finally
            {
                input.Close();
            }
        }
Пример #16
0
        public virtual void  CopyFile(Directory dir, System.String src, System.String dest)
        {
            IndexInput  in_Renamed  = dir.OpenInput(src);
            IndexOutput out_Renamed = dir.CreateOutput(dest);

            byte[] b         = new byte[1024];
            long   remainder = in_Renamed.Length();

            while (remainder > 0)
            {
                int len = (int)System.Math.Min(b.Length, remainder);
                in_Renamed.ReadBytes(b, 0, len);
                out_Renamed.WriteBytes(b, len);
                remainder -= len;
            }
            in_Renamed.Close();
            out_Renamed.Close();
        }
 /** Copy numBytes from srcIn to destIn */
 void copyBytes(IndexInput srcIn, IndexOutput destIn, long numBytes)
 {
     // TODO: we could do this more efficiently (save a copy)
     // because it's always from a ByteSliceReader ->
     // IndexOutput
     while (numBytes > 0)
     {
         int chunk;
         if (numBytes > 4096)
         {
             chunk = 4096;
         }
         else
         {
             chunk = (int)numBytes;
         }
         srcIn.ReadBytes(copyByteBuffer, 0, chunk);
         destIn.WriteBytes(copyByteBuffer, 0, chunk);
         numBytes -= chunk;
     }
 }
Пример #18
0
        private void  AssertSameStreams(System.String msg, IndexInput expected, IndexInput test)
        {
            Assert.IsNotNull(expected, msg + " null expected");
            Assert.IsNotNull(test, msg + " null test");
            Assert.AreEqual(expected.Length(), test.Length(), msg + " length");
            Assert.AreEqual(expected.GetFilePointer(), test.GetFilePointer(), msg + " position");

            byte[] expectedBuffer = new byte[512];
            byte[] testBuffer     = new byte[expectedBuffer.Length];

            long remainder = expected.Length() - expected.GetFilePointer();

            while (remainder > 0)
            {
                int readLen = (int)System.Math.Min(remainder, expectedBuffer.Length);
                expected.ReadBytes(expectedBuffer, 0, readLen);
                test.ReadBytes(testBuffer, 0, readLen);
                AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen);
                remainder -= readLen;
            }
        }
Пример #19
0
        public static void  Main(System.String[] args)
        {
            System.String filename = null;
            bool          extract  = false;

            for (int i = 0; i < args.Length; ++i)
            {
                if (args[i].Equals("-extract"))
                {
                    extract = true;
                }
                else if (filename == null)
                {
                    filename = args[i];
                }
            }

            if (filename == null)
            {
                System.Console.Out.WriteLine("Usage: Lucene.Net.index.IndexReader [-extract] <cfsfile>");
                return;
            }

            Directory          dir = null;
            CompoundFileReader cfr = null;

            try
            {
                System.IO.FileInfo file    = new System.IO.FileInfo(filename);
                System.String      dirname = new System.IO.FileInfo(file.FullName).DirectoryName;
                filename = file.Name;
                dir      = FSDirectory.GetDirectory(dirname, false);
                cfr      = new CompoundFileReader(dir, filename);

                System.String[] files = cfr.List();
                System.Array.Sort(files);                 // sort the array of filename so that the output is more readable

                for (int i = 0; i < files.Length; ++i)
                {
                    long len = cfr.FileLength(files[i]);

                    if (extract)
                    {
                        System.Console.Out.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory...");
                        IndexInput ii = cfr.OpenInput(files[i]);

                        System.IO.FileStream f = new System.IO.FileStream(files[i], System.IO.FileMode.Create);

                        // read and write with a small buffer, which is more effectiv than reading byte by byte
                        byte[] buffer = new byte[1024];
                        int    chunk  = buffer.Length;
                        while (len > 0)
                        {
                            int bufLen = (int)System.Math.Min(chunk, len);
                            ii.ReadBytes(buffer, 0, bufLen);

                            byte[] byteArray = new byte[buffer.Length];
                            for (int index = 0; index < buffer.Length; index++)
                            {
                                byteArray[index] = (byte)buffer[index];
                            }

                            f.Write(byteArray, 0, bufLen);

                            len -= bufLen;
                        }

                        f.Close();
                        ii.Close();
                    }
                    else
                    {
                        System.Console.Out.WriteLine(files[i] + ": " + len + " bytes");
                    }
                }
            }
            catch (System.IO.IOException ioe)
            {
                System.Console.Error.WriteLine(ioe.StackTrace);
            }
            finally
            {
                try
                {
                    if (dir != null)
                    {
                        dir.Close();
                    }
                    if (cfr != null)
                    {
                        cfr.Close();
                    }
                }
                catch (System.IO.IOException ioe)
                {
                    System.Console.Error.WriteLine(ioe.StackTrace);
                }
            }
        }
Пример #20
0
 private NumericDocValues LoadByteField(FieldInfo field, IndexInput input)
 {
     CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_START, Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
     int valueSize = input.ReadInt();
     if (valueSize != 1)
     {
         throw new CorruptIndexException("invalid valueSize: " + valueSize);
     }
     int maxDoc = State.SegmentInfo.DocCount;
     var values = new byte[maxDoc];
     input.ReadBytes(values, 0, values.Length);
     RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
     return new NumericDocValuesAnonymousInnerClassHelper3(values);
 }
 internal SkipBuffer(IndexInput input, int length)
 {
     data    = new byte[length];
     pointer = input.GetFilePointer();
     input.ReadBytes(data, 0, length);
 }
 internal SkipBuffer(IndexInput input, int length)
     : base("SkipBuffer on " + input)
 {
     Data = new byte[length];
     Pointer = input.FilePointer;
     input.ReadBytes(Data, 0, length);
 }
 /** Copy numBytes from srcIn to destIn */
 void copyBytes(IndexInput srcIn, IndexOutput destIn, long numBytes)
 {
     // TODO: we could do this more efficiently (save a copy)
     // because it's always from a ByteSliceReader ->
     // IndexOutput
     while (numBytes > 0)
     {
         int chunk;
         if (numBytes > 4096)
             chunk = 4096;
         else
             chunk = (int)numBytes;
         srcIn.ReadBytes(copyByteBuffer, 0, chunk);
         destIn.WriteBytes(copyByteBuffer, 0, chunk);
         numBytes -= chunk;
     }
 }
Пример #24
0
 /// <summary>Read as a bit set </summary>
 private void ReadBits(IndexInput input)
 {
     count = input.ReadInt(); // read count
     bits = new byte[(size >> 3) + 1]; // allocate bits
     input.ReadBytes(bits, 0, bits.Length);
 }
Пример #25
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <param name="mapper">The mapper used to map the TermVector
        /// </param>
        /// <throws>  IOException </throws>
        private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return;
            }

            bool storePositions;
            bool storeOffsets;

            if (format >= FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets   = false;
            }
            mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            byte[] byteBuffer;
            char[] charBuffer;
            bool   preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;

            // init the buffers
            if (preUTF8)
            {
                charBuffer = new char[10];
                byteBuffer = null;
            }
            else
            {
                charBuffer = null;
                byteBuffer = new byte[20];
            }

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;

                System.String term;

                if (preUTF8)
                {
                    // Term stored as java chars
                    if (charBuffer.Length < totalLength)
                    {
                        char[] newCharBuffer = new char[(int)(1.5 * totalLength)];
                        Array.Copy(charBuffer, 0, newCharBuffer, 0, start);
                        charBuffer = newCharBuffer;
                    }
                    tvf.ReadChars(charBuffer, start, deltaLength);
                    term = new System.String(charBuffer, 0, totalLength);
                }
                else
                {
                    // Term stored as utf8 bytes
                    if (byteBuffer.Length < totalLength)
                    {
                        byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)];
                        Array.Copy(byteBuffer, 0, newByteBuffer, 0, start);
                        byteBuffer = newByteBuffer;
                    }
                    tvf.ReadBytes(byteBuffer, start, deltaLength);
                    term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength);
                }
                int   freq      = tvf.ReadVInt();
                int[] positions = null;
                if (storePositions)
                {
                    //read in the positions
                    //does the mapper even care about positions?
                    if (mapper.IsIgnoringPositions() == false)
                    {
                        positions = new int[freq];
                        int prevPosition = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            positions[j] = prevPosition + tvf.ReadVInt();
                            prevPosition = positions[j];
                        }
                    }
                    else
                    {
                        //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
                        //
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                        }
                    }
                }
                TermVectorOffsetInfo[] offsets = null;
                if (storeOffsets)
                {
                    //does the mapper even care about offsets?
                    if (mapper.IsIgnoringOffsets() == false)
                    {
                        offsets = new TermVectorOffsetInfo[freq];
                        int prevOffset = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            int startOffset = prevOffset + tvf.ReadVInt();
                            int endOffset   = startOffset + tvf.ReadVInt();
                            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
                            prevOffset = endOffset;
                        }
                    }
                    else
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                            tvf.ReadVInt();
                        }
                    }
                }
                mapper.Map(term, freq, offsets, positions);
            }
        }
Пример #26
0
 public void Read(IndexInput input, FieldInfos fieldInfos)
 {
     this.Term = null; // invalidate cache
     NewSuffixStart = input.ReadVInt();
     int length = input.ReadVInt();
     int totalLength = NewSuffixStart + length;
     Debug.Assert(totalLength <= ByteBlockPool.BYTE_BLOCK_SIZE - 2, "termLength=" + totalLength + ",resource=" + input);
     if (Bytes.Bytes.Length < totalLength)
     {
         Bytes.Grow(totalLength);
     }
     Bytes.Length = totalLength;
     input.ReadBytes(Bytes.Bytes, NewSuffixStart, length);
     int fieldNumber = input.ReadVInt();
     if (fieldNumber != CurrentFieldNumber)
     {
         CurrentFieldNumber = fieldNumber;
         // NOTE: too much sneakiness here, seriously this is a negative vint?!
         if (CurrentFieldNumber == -1)
         {
             Field = "";
         }
         else
         {
             Debug.Assert(fieldInfos.FieldInfo(CurrentFieldNumber) != null, CurrentFieldNumber.ToString());
             Field = String.Intern(fieldInfos.FieldInfo(CurrentFieldNumber).Name);
         }
     }
     else
     {
         Debug.Assert(Field.Equals(fieldInfos.FieldInfo(fieldNumber).Name), "currentFieldNumber=" + CurrentFieldNumber + " field=" + Field + " vs " + fieldInfos.FieldInfo(fieldNumber) == null ? "null" : fieldInfos.FieldInfo(fieldNumber).Name);
     }
 }
 internal SkipBuffer(IndexInput input, int length)
 {
     data = new byte[length];
     pointer = input.GetFilePointer();
     input.ReadBytes(data, 0, length);
 }
Пример #28
0
        public void Read(IndexInput input, FieldInfos fieldInfos)
        {
            this.term = null; // invalidate cache
            int start = input.ReadVInt();
            int length = input.ReadVInt();
            int totalLength = start + length;
            if (preUTF8Strings)
            {
                text.SetLength(totalLength);
                input.ReadChars(text.result, start, length);
            }
            else
            {

                if (dirty)
                {
                    // Fully convert all bytes since bytes is dirty
                    UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
                    dirty = false;
                }
                else
                {
                    // Incrementally convert only the UTF8 bytes that are new:
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
                }
            }
            this.field = fieldInfos.FieldName(input.ReadVInt());
        }
Пример #29
0
 /// <summary>
 /// Read this many bytes from in </summary>
 public void Copy(IndexInput @in, long byteCount)
 {
     while (byteCount > 0)
     {
         int left = BlockSize - Upto;
         if (left == 0)
         {
             if (CurrentBlock != null)
             {
                 Blocks.Add(CurrentBlock);
                 BlockEnd.Add(Upto);
             }
             CurrentBlock = new byte[BlockSize];
             Upto = 0;
             left = BlockSize;
         }
         if (left < byteCount)
         {
             @in.ReadBytes(CurrentBlock, Upto, left, false);
             Upto = BlockSize;
             byteCount -= left;
         }
         else
         {
             @in.ReadBytes(CurrentBlock, Upto, (int)byteCount, false);
             Upto += (int)byteCount;
             break;
         }
     }
 }
Пример #30
0
        public /*internal*/ Document Doc(int n)
        {
            indexStream.Seek(n * 8L);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            Document doc       = new Document();
            int      numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int       fieldNumber = fieldsStream.ReadVInt();
                FieldInfo fi          = fieldInfos.FieldInfo(fieldNumber);

                byte bits = fieldsStream.ReadByte();

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                bool tokenize   = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;

                if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0)
                {
                    byte[] b = new byte[fieldsStream.ReadVInt()];
                    fieldsStream.ReadBytes(b, 0, b.Length);
                    if (compressed)
                    {
                        doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
                    }
                    else
                    {
                        doc.Add(new Field(fi.name, b, Field.Store.YES));
                    }
                }
                else
                {
                    Field.Index index;
                    Field.Store store = Field.Store.YES;

                    if (fi.isIndexed && tokenize)
                    {
                        index = Field.Index.TOKENIZED;
                    }
                    else if (fi.isIndexed && !tokenize)
                    {
                        index = Field.Index.UN_TOKENIZED;
                    }
                    else
                    {
                        index = Field.Index.NO;
                    }

                    Field.TermVector termVector = null;
                    if (fi.storeTermVector)
                    {
                        if (fi.storeOffsetWithTermVector)
                        {
                            if (fi.storePositionWithTermVector)
                            {
                                termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
                            }
                            else
                            {
                                termVector = Field.TermVector.WITH_OFFSETS;
                            }
                        }
                        else if (fi.storePositionWithTermVector)
                        {
                            termVector = Field.TermVector.WITH_POSITIONS;
                        }
                        else
                        {
                            termVector = Field.TermVector.YES;
                        }
                    }
                    else
                    {
                        termVector = Field.TermVector.NO;
                    }

                    if (compressed)
                    {
                        store = Field.Store.COMPRESS;
                        byte[] b = new byte[fieldsStream.ReadVInt()];
                        fieldsStream.ReadBytes(b, 0, b.Length);
                        Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                    else
                    {
                        Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                }
            }

            return(doc);
        }
Пример #31
0
 private void  AssertSameStreams(System.String msg, IndexInput expected, IndexInput test)
 {
     Assert.IsNotNull(expected, msg + " null expected");
     Assert.IsNotNull(test, msg + " null test");
     Assert.AreEqual(expected.Length(), test.Length(), msg + " length");
     Assert.AreEqual(expected.FilePointer, test.FilePointer, msg + " position");
     
     byte[] expectedBuffer = new byte[512];
     byte[] testBuffer = new byte[expectedBuffer.Length];
     
     long remainder = expected.Length() - expected.FilePointer;
     while (remainder > 0)
     {
         int readLen = (int) System.Math.Min(remainder, expectedBuffer.Length);
         expected.ReadBytes(expectedBuffer, 0, readLen);
         test.ReadBytes(testBuffer, 0, readLen);
         AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen);
         remainder -= readLen;
     }
 }
Пример #32
0
 public override void  ReadInternal(byte[] b, int offset, int length)
 {
     SimOutage();
     delegate_Renamed.ReadBytes(b, offset, length);
 }
Пример #33
0
 /// <summary>Read as a bit set </summary>
 private void  ReadBits(IndexInput input)
 {
     count = input.ReadInt();             // read count
     bits  = new byte[(size >> 3) + 1];   // allocate bits
     input.ReadBytes(bits, 0, bits.Length);
 }
Пример #34
0
 /// <summary>
 /// Read as a bit set </summary>
 private void ReadBits(IndexInput input)
 {
     Count_Renamed = input.ReadInt(); // read count
     Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits
     input.ReadBytes(Bits, 0, Bits.Length);
 }