Beispiel #1
0
        public void  Read(IndexInput input, FieldInfos fieldInfos, IState state)
        {
            this.term = null; // invalidate cache
            int start       = input.ReadVInt(state);
            int length      = input.ReadVInt(state);
            int totalLength = start + length;

            if (preUTF8Strings)
            {
                text.SetLength(totalLength);
                input.ReadChars(text.result, start, length, state);
            }
            else
            {
                if (dirty)
                {
                    // Fully convert all bytes since bytes is dirty
                    UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length, state);
                    UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
                    dirty = false;
                }
                else
                {
                    // Incrementally convert only the UTF8 bytes that are new:
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length, state);
                    UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
                }
            }
            this.field = fieldInfos.FieldName(input.ReadVInt(state));
        }
Beispiel #2
0
        protected internal override int ReadSkipData(int level, IndexInput skipStream, IState state)
        {
            int delta;

            if (currentFieldStoresPayloads)
            {
                // the current field stores payloads.
                // if the doc delta is odd then we have
                // to read the current payload length
                // because it differs from the length of the
                // previous payload
                delta = skipStream.ReadVInt(state);
                if ((delta & 1) != 0)
                {
                    payloadLength[level] = skipStream.ReadVInt(state);
                }
                delta = Number.URShift(delta, 1);
            }
            else
            {
                delta = skipStream.ReadVInt(state);
            }
            freqPointer[level] += skipStream.ReadVInt(state);
            proxPointer[level] += skipStream.ReadVInt(state);

            return(delta);
        }
Beispiel #3
0
        public /*internal*/ Document Doc(int n, FieldSelector fieldSelector, IState state)
        {
            SeekIndex(n, state);
            long position = indexStream.ReadLong(state);

            fieldsStream.Seek(position, state);

            var doc       = new Document();
            int numFields = fieldsStream.ReadVInt(state);

            for (int i = 0; i < numFields; i++)
            {
                int                 fieldNumber = fieldsStream.ReadVInt(state);
                FieldInfo           fi          = fieldInfos.FieldInfo(fieldNumber);
                FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name);

                byte bits = fieldsStream.ReadByte(state);
                System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                System.Diagnostics.Debug.Assert(
                    (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)),
                    "compressed fields are only allowed in indexes of version <= 2.9");
                bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
                bool binary   = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
                //TODO: Find an alternative approach here if this list continues to grow beyond the
                //list of 5 or 6 currently here.  See Lucene 762 for discussion
                if (acceptField.Equals(FieldSelectorResult.LOAD))
                {
                    AddField(doc, fi, binary, compressed, tokenize, state);
                }
                else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
                {
                    AddField(doc, fi, binary, compressed, tokenize, state);
                    break;                     //Get out of this loop
                }
                else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
                {
                    AddFieldLazy(doc, fi, binary, compressed, tokenize, state);
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE))
                {
                    SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed, state), state);
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
                {
                    AddFieldSize(doc, fi, binary, compressed, state);
                    break;
                }
                else
                {
                    SkipField(binary, compressed, state);
                }
            }

            return(doc);
        }
Beispiel #4
0
        private void  Read(IndexInput input, String fileName, IState state)
        {
            int firstInt = input.ReadVInt(state);

            if (firstInt < 0)
            {
                // This is a real format
                format = firstInt;
            }
            else
            {
                format = FORMAT_PRE;
            }

            if (format != FORMAT_PRE & format != FORMAT_START)
            {
                throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
            }

            int size;

            if (format == FORMAT_PRE)
            {
                size = firstInt;
            }
            else
            {
                size = input.ReadVInt(state);                 //read in the size
            }

            for (int i = 0; i < size; i++)
            {
                String name            = StringHelper.Intern(input.ReadString(state));
                byte   bits            = input.ReadByte(state);
                bool   isIndexed       = (bits & IS_INDEXED) != 0;
                bool   storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                bool   storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                bool   storeOffsetWithTermVector    = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
                bool   omitNorms                = (bits & OMIT_NORMS) != 0;
                bool   storePayloads            = (bits & STORE_PAYLOADS) != 0;
                bool   omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;

                AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
            }

            if (input.FilePointer(state) != input.Length(state))
            {
                throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer(state) + " vs size " + input.Length(state));
            }
        }
Beispiel #5
0
        public virtual void TestCloneClose()
        {
            MMapDirectory mmapDir = new MMapDirectory(CreateTempDir("testCloneClose"));
            IndexOutput   io      = mmapDir.CreateOutput("bytes", NewIOContext(Random()));

            io.WriteVInt(5);
            io.Dispose();
            IndexInput one   = mmapDir.OpenInput("bytes", IOContext.DEFAULT);
            IndexInput two   = (IndexInput)one.Clone();
            IndexInput three = (IndexInput)two.Clone(); // clone of clone

            two.Dispose();
            Assert.AreEqual(5, one.ReadVInt());
            try
            {
                two.ReadVInt();
                Assert.Fail("Must throw AlreadyClosedException");
            }
            catch (AlreadyClosedException ignore)
            {
                // pass
            }
            Assert.AreEqual(5, three.ReadVInt());
            one.Dispose();
            three.Dispose();
            mmapDir.Dispose();
        }
        private int ReadDeltaPosition(IState state)
        {
            int delta = proxStream.ReadVInt(state);

            if (currentFieldStoresPayloads)
            {
                // if the current field stores payloads then
                // the position delta is shifted one bit to the left.
                // if the LSB is set, then we have to read the current
                // payload length
                if ((delta & 1) != 0)
                {
                    payloadLength = proxStream.ReadVInt(state);
                }
                delta             = Number.URShift(delta, 1);
                needToLoadPayload = true;
            }
            return(delta);
        }
        /// <summary>Increments the enumeration to the next element.  True if one exists.</summary>
        public override bool Next(IState state)
        {
            if (position++ >= size - 1)
            {
                prevBuffer.Set(termBuffer);
                termBuffer.Reset();
                return(false);
            }

            prevBuffer.Set(termBuffer);
            termBuffer.Read(input, fieldInfos, state);

            termInfo.docFreq      = input.ReadVInt(state);        // read doc freq
            termInfo.freqPointer += input.ReadVLong(state);       // read freq pointer
            termInfo.proxPointer += input.ReadVLong(state);       // read prox pointer

            if (format == -1)
            {
                //  just read skipOffset in order to increment  file pointer;
                // value is never used since skipTo is switched off
                if (!isIndex)
                {
                    if (termInfo.docFreq > formatM1SkipInterval)
                    {
                        termInfo.skipOffset = input.ReadVInt(state);
                    }
                }
            }
            else
            {
                if (termInfo.docFreq >= skipInterval)
                {
                    termInfo.skipOffset = input.ReadVInt(state);
                }
            }

            if (isIndex)
            {
                indexPointer += input.ReadVLong(state);                 // read index pointer
            }
            return(true);
        }
Beispiel #8
0
        public CompoundFileReader(Directory dir, System.String name, int readBufferSize, IState state)
        {
            directory           = dir;
            fileName            = name;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                stream = dir.OpenInput(name, readBufferSize, state);

                // read the directory and init files
                int       count = stream.ReadVInt(state);
                FileEntry entry = null;
                for (int i = 0; i < count; i++)
                {
                    long          offset = stream.ReadLong(state);
                    System.String id     = stream.ReadString(state);

                    if (entry != null)
                    {
                        // set length of the previous entry
                        entry.length = offset - entry.offset;
                    }

                    entry = new FileEntry {
                        offset = offset
                    };
                    entries[id] = entry;
                }

                // set the length of the final entry
                if (entry != null)
                {
                    entry.length = stream.Length(state) - entry.offset;
                }

                success = true;
            }
            finally
            {
                if (!success && (stream != null))
                {
                    try
                    {
                        stream.Close();
                    }
                    catch (System.IO.IOException)
                    {
                    }
                }
            }
        }
Beispiel #9
0
        public virtual bool Next(IState state)
        {
            while (true)
            {
                if (count == df)
                {
                    return(false);
                }
                int docCode = freqStream.ReadVInt(state);

                if (currentFieldOmitTermFreqAndPositions)
                {
                    doc += docCode;
                    freq = 1;
                }
                else
                {
                    doc += Number.URShift(docCode, 1);                     // shift off low bit
                    if ((docCode & 1) != 0)
                    {
                        // if low bit is set
                        freq = 1;
                    }
                    // freq is one
                    else
                    {
                        freq = freqStream.ReadVInt(state);                         // else read freq
                    }
                }

                count++;

                if (deletedDocs == null || !deletedDocs.Get(doc))
                {
                    break;
                }
                SkippingDoc();
            }
            return(true);
        }
Beispiel #10
0
        /// <summary>read as a d-gaps list </summary>
        private void  ReadDgaps(IndexInput input, IState state)
        {
            size  = input.ReadInt(state);            // (re)read size
            count = input.ReadInt(state);            // read count
            bits  = new byte[(size >> 3) + 1];       // allocate bits
            int last = 0;
            int n    = Count();

            while (n > 0)
            {
                last      += input.ReadVInt(state);
                bits[last] = input.ReadByte(state);
                n         -= BYTE_COUNTS[bits[last] & 0xFF];
            }
        }
        public override void Init(IndexInput termsIn)
        {
            version = CodecUtil.CheckHeader(termsIn, PulsingPostingsWriter.CODEC,
                PulsingPostingsWriter.VERSION_START,
                PulsingPostingsWriter.VERSION_CURRENT);

            maxPositions = termsIn.ReadVInt();
            _wrappedPostingsReader.Init(termsIn);

            if (_wrappedPostingsReader is PulsingPostingsReader || version < PulsingPostingsWriter.VERSION_META_ARRAY)
            {
                fields = null;
            }
            else
            {
                fields = new SortedDictionary<int, int>();
                String summaryFileName = IndexFileNames.SegmentFileName(segmentState.SegmentInfo.Name,
                    segmentState.SegmentSuffix, PulsingPostingsWriter.SUMMARY_EXTENSION);
                IndexInput input = null;

                try
                {
                    input =
                        segmentState.Directory.OpenInput(summaryFileName, segmentState.Context);
                    CodecUtil.CheckHeader(input,
                        PulsingPostingsWriter.CODEC,
                        version,
                        PulsingPostingsWriter.VERSION_CURRENT);

                    int numField = input.ReadVInt();
                    for (int i = 0; i < numField; i++)
                    {
                        int fieldNum = input.ReadVInt();
                        int longsSize = input.ReadVInt();
                        fields.Add(fieldNum, longsSize);
                    }
                }
                finally
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
            }
        }
 /// <summary>
 /// Sole constructor. </summary>
 public MonotonicBlockPackedReader(IndexInput @in, int packedIntsVersion, int blockSize, long valueCount, bool direct)
 {
     this.ValueCount = valueCount;
     BlockShift = PackedInts.CheckBlockSize(blockSize, AbstractBlockPackedWriter.MIN_BLOCK_SIZE, AbstractBlockPackedWriter.MAX_BLOCK_SIZE);
     BlockMask = blockSize - 1;
     int numBlocks = PackedInts.NumBlocks(valueCount, blockSize);
     MinValues = new long[numBlocks];
     Averages = new float[numBlocks];
     SubReaders = new PackedInts.Reader[numBlocks];
     for (int i = 0; i < numBlocks; ++i)
     {
         MinValues[i] = @in.ReadVLong();
         Averages[i] = Number.IntBitsToFloat(@in.ReadInt());
         int bitsPerValue = @in.ReadVInt();
         if (bitsPerValue > 64)
         {
             throw new Exception("Corrupted");
         }
         if (bitsPerValue == 0)
         {
             SubReaders[i] = new PackedInts.NullReader(blockSize);
         }
         else
         {
             int size = (int)Math.Min(blockSize, valueCount - (long)i * blockSize);
             if (direct)
             {
                 long pointer = @in.FilePointer;
                 SubReaders[i] = PackedInts.GetDirectReaderNoHeader(@in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
                 @in.Seek(pointer + PackedInts.Format.PACKED.ByteCount(packedIntsVersion, size, bitsPerValue));
             }
             else
             {
                 SubReaders[i] = PackedInts.GetReaderNoHeader(@in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
             }
         }
     }
 }
 public FixedIntBlockIndexInput(IndexInput @in)
 {
     input = @in;
     blockSize = @in.ReadVInt();
 }
 private void ReadFields(IndexInput meta, FieldInfos infos)
 {
     int fieldNumber = meta.ReadVInt();
     while (fieldNumber != -1)
     {
         int fieldType = meta.ReadByte();
         if (fieldType == NUMBER)
         {
             var entry = new NumericEntry {offset = meta.ReadLong(), missingOffset = meta.ReadLong()};
             if (entry.missingOffset != -1)
             {
                 entry.missingBytes = meta.ReadLong();
             }
             else
             {
                 entry.missingBytes = 0;
             }
             entry.format = meta.ReadByte();
             switch (entry.format)
             {
                 case DELTA_COMPRESSED:
                 case TABLE_COMPRESSED:
                 case GCD_COMPRESSED:
                 case UNCOMPRESSED:
                     break;
                 default:
                     throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
             }
             if (entry.format != UNCOMPRESSED)
             {
                 entry.packedIntsVersion = meta.ReadVInt();
             }
             numerics[fieldNumber] = entry;
         }
         else if (fieldType == BYTES)
         {
             var entry = new BinaryEntry
             {
                 offset = meta.ReadLong(),
                 numBytes = meta.ReadLong(),
                 missingOffset = meta.ReadLong()
             };
             if (entry.missingOffset != -1)
             {
                 entry.missingBytes = meta.ReadLong();
             }
             else
             {
                 entry.missingBytes = 0;
             }
             entry.minLength = meta.ReadVInt();
             entry.maxLength = meta.ReadVInt();
             if (entry.minLength != entry.maxLength)
             {
                 entry.packedIntsVersion = meta.ReadVInt();
                 entry.blockSize = meta.ReadVInt();
             }
             binaries[fieldNumber] = entry;
         }
         else if (fieldType == FST)
         {
             var entry = new FSTEntry {offset = meta.ReadLong(), numOrds = meta.ReadVLong()};
             fsts[fieldNumber] = entry;
         }
         else
         {
             throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
         }
         fieldNumber = meta.ReadVInt();
     }
 }
Beispiel #15
0
 public override int ReadVInt()
 {
     EnsureOpen();
     return(@delegate.ReadVInt());
 }
        public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
            String segmentSuffix, IOContext context)
        {
            input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
                        VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true));
            this.segment = segment;
            bool success = false;

            Debug.Debug.Assert((indexDivisor == -1 || indexDivisor > 0);

            try
            {

                version = readHeader(input);
                this.indexDivisor = indexDivisor;

                if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(input);
                }

                SeekDir(in,
                dirOffset)
                ;

                // Read directory
                int numFields = input.ReadVInt();
                if (numFields < 0)
                {
                    throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + input + ")");
                }

                for (int i = 0; i < numFields; i++)
                {
                    final
                    int field = in.
                    readVInt();
                    final
                    long indexStart = in.
                    readVLong();
                    final
                    FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
                    FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(fieldInfo, indexStart));
                    if (previous != null)
                    {
                        throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" +in + ")" )
                        ;
                    }
                }
                success = true;
            }
            finally
            {
                if (indexDivisor > 0)
                {
                in.
                    close();
                    in =
                    null;
                    if (success)
                    {
                        indexLoaded = true;
                    }
                }
            }
        private void ReadFields(IndexInput meta, FieldInfos infos)
        {
            int fieldNumber = meta.ReadVInt();
            while (fieldNumber != -1)
            {
                // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check
                // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616
                if (fieldNumber < 0)
                {
                    // trickier to validate more: because we re-use for norms, because we use multiple entries
                    // for "composite" types like sortedset, etc.
                    throw new CorruptIndexException("Invalid field number: " + fieldNumber + ", input=" + meta);
                }
                int fieldType = meta.ReadByte();
                if (fieldType == NUMBER)
                {
                    var entry = new NumericEntry {Offset = meta.ReadLong(), Format = (sbyte)meta.ReadByte()};
                    switch (entry.Format)
                    {
                        case DELTA_COMPRESSED:
                        case TABLE_COMPRESSED:
                        case GCD_COMPRESSED:
                        case UNCOMPRESSED:
                            break;

                        default:
                            throw new CorruptIndexException("Unknown format: " + entry.Format + ", input=" + meta);
                    }
                    if (entry.Format != UNCOMPRESSED)
                    {
                        entry.PackedIntsVersion = meta.ReadVInt();
                    }
                    Numerics[fieldNumber] = entry;
                }
                else if (fieldType == BYTES)
                {
                    BinaryEntry entry = new BinaryEntry();
                    entry.Offset = meta.ReadLong();
                    entry.NumBytes = meta.ReadLong();
                    entry.MinLength = meta.ReadVInt();
                    entry.MaxLength = meta.ReadVInt();
                    if (entry.MinLength != entry.MaxLength)
                    {
                        entry.PackedIntsVersion = meta.ReadVInt();
                        entry.BlockSize = meta.ReadVInt();
                    }
                    Binaries[fieldNumber] = entry;
                }
                else if (fieldType == FST)
                {
                    FSTEntry entry = new FSTEntry();
                    entry.Offset = meta.ReadLong();
                    entry.NumOrds = meta.ReadVLong();
                    Fsts[fieldNumber] = entry;
                }
                else
                {
                    throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
                }
                fieldNumber = meta.ReadVInt();
            }
        }
Beispiel #18
0
        private static IDictionary <string, FileEntry> ReadLegacyEntries(IndexInput stream, int firstInt)
        {
            IDictionary <string, FileEntry> entries = new Dictionary <string, FileEntry>();
            int  count;
            bool stripSegmentName;

            if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION)
            {
                if (firstInt < CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX)
                {
                    throw new CorruptIndexException("Incompatible format version: " + firstInt + " expected >= " + CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX + " (resource: " + stream + ")");
                }
                // It's a post-3.1 index, read the count.
                count            = stream.ReadVInt();
                stripSegmentName = false;
            }
            else
            {
                count            = firstInt;
                stripSegmentName = true;
            }

            // read the directory and init files
            long      streamLength = stream.Length();
            FileEntry entry        = null;

            for (int i = 0; i < count; i++)
            {
                long offset = stream.ReadLong();
                if (offset < 0 || offset > streamLength)
                {
                    throw new CorruptIndexException("Invalid CFS entry offset: " + offset + " (resource: " + stream + ")");
                }
                string id = stream.ReadString();

                if (stripSegmentName)
                {
                    // Fix the id to not include the segment names. this is relevant for
                    // pre-3.1 indexes.
                    id = IndexFileNames.StripSegmentName(id);
                }

                if (entry != null)
                {
                    // set length of the previous entry
                    entry.Length = offset - entry.Offset;
                }

                entry        = new FileEntry();
                entry.Offset = offset;

                FileEntry previous = entries[id] = entry;
                if (previous != null)
                {
                    throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + stream);
                }
            }

            // set the length of the final entry
            if (entry != null)
            {
                entry.Length = streamLength - entry.Offset;
            }

            return(entries);
        }
Beispiel #19
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <param name="mapper">The mapper used to map the TermVector
        /// </param>
        /// <throws>  IOException </throws>
        private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper, IState state)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer, state);

            int numTerms = tvf.ReadVInt(state);

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return;
            }

            bool storePositions;
            bool storeOffsets;

            if (format >= FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte(state);
                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt(state);
                storePositions = false;
                storeOffsets   = false;
            }
            mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            byte[] byteBuffer;
            char[] charBuffer;
            bool   preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;

            // init the buffers
            if (preUTF8)
            {
                charBuffer = new char[10];
                byteBuffer = null;
            }
            else
            {
                charBuffer = null;
                byteBuffer = new byte[20];
            }

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt(state);
                deltaLength = tvf.ReadVInt(state);
                totalLength = start + deltaLength;

                System.String term;

                if (preUTF8)
                {
                    // Term stored as java chars
                    if (charBuffer.Length < totalLength)
                    {
                        char[] newCharBuffer = new char[(int)(1.5 * totalLength)];
                        Array.Copy(charBuffer, 0, newCharBuffer, 0, start);
                        charBuffer = newCharBuffer;
                    }
                    tvf.ReadChars(charBuffer, start, deltaLength, state);
                    term = new System.String(charBuffer, 0, totalLength);
                }
                else
                {
                    // Term stored as utf8 bytes
                    if (byteBuffer.Length < totalLength)
                    {
                        byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)];
                        Array.Copy(byteBuffer, 0, newByteBuffer, 0, start);
                        byteBuffer = newByteBuffer;
                    }
                    tvf.ReadBytes(byteBuffer, start, deltaLength, state);
                    term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength);
                }
                int   freq      = tvf.ReadVInt(state);
                int[] positions = null;
                if (storePositions)
                {
                    //read in the positions
                    //does the mapper even care about positions?
                    if (mapper.IsIgnoringPositions == false)
                    {
                        positions = new int[freq];
                        int prevPosition = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            positions[j] = prevPosition + tvf.ReadVInt(state);
                            prevPosition = positions[j];
                        }
                    }
                    else
                    {
                        //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
                        //
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt(state);
                        }
                    }
                }
                TermVectorOffsetInfo[] offsets = null;
                if (storeOffsets)
                {
                    //does the mapper even care about offsets?
                    if (mapper.IsIgnoringOffsets == false)
                    {
                        offsets = new TermVectorOffsetInfo[freq];
                        int prevOffset = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            int startOffset = prevOffset + tvf.ReadVInt(state);
                            int endOffset   = startOffset + tvf.ReadVInt(state);
                            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
                            prevOffset = endOffset;
                        }
                    }
                    else
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt(state);
                            tvf.ReadVInt(state);
                        }
                    }
                }
                mapper.Map(term, freq, offsets, positions);
            }
        }
Beispiel #20
0
 /// <summary>
 /// Skip the next block of data.
 /// </summary>
 /// <param name="in">      the input where to read data </param>
 /// <exception cref="IOException"> If there is a low-level I/O error </exception>
 public void SkipBlock(IndexInput @in)
 {
     int numBits = @in.ReadByte();
     if (numBits == ALL_VALUES_EQUAL)
     {
         @in.ReadVInt();
         return;
     }
     Debug.Assert(numBits > 0 && numBits <= 32, numBits.ToString());
     int encodedSize = EncodedSizes[numBits];
     @in.Seek(@in.FilePointer + encodedSize);
 }
 /// <summary>
 /// Read values that have been written using variable-length encoding instead of bit-packing.
 /// </summary>
 internal static void ReadVIntBlock(IndexInput docIn, int[] docBuffer, int[] freqBuffer, int num, bool indexHasFreq)
 {
     if (indexHasFreq)
     {
         for (int i = 0; i < num; i++)
         {
             int code = docIn.ReadVInt();
             docBuffer[i] = (int)((uint)code >> 1);
             if ((code & 1) != 0)
             {
                 freqBuffer[i] = 1;
             }
             else
             {
                 freqBuffer[i] = docIn.ReadVInt();
             }
         }
     }
     else
     {
         for (int i = 0; i < num; i++)
         {
             docBuffer[i] = docIn.ReadVInt();
         }
     }
 }
        /// <summary>
        /// Sole constructor. </summary>
        public CompressingTermVectorsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode)
        {
            this.compressionMode = compressionMode;
            string segment = si.Name;
            bool success = false;
            fieldInfos = fn;
            numDocs = si.DocCount;
            ChecksumIndexInput indexStream = null;
            try
            {
                // Load the index into memory
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION);
                indexStream = d.OpenChecksumInput(indexStreamFN, context);
                string codecNameIdx = formatName + CompressingTermVectorsWriter.CODEC_SFX_IDX;
                version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT);
                Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer);
                indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

                if (version >= CompressingTermVectorsWriter.VERSION_CHECKSUM)
                {
                    indexStream.ReadVLong(); // the end of the data file
                    CodecUtil.CheckFooter(indexStream);
                }
                else
                {
                    CodecUtil.CheckEOF(indexStream);
                }
                indexStream.Dispose();
                indexStream = null;

                // Open the data file and read metadata
                string vectorsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_EXTENSION);
                vectorsStream = d.OpenInput(vectorsStreamFN, context);
                string codecNameDat = formatName + CompressingTermVectorsWriter.CODEC_SFX_DAT;
                int version2 = CodecUtil.CheckHeader(vectorsStream, codecNameDat, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT);
                if (version != version2)
                {
                    throw new Exception("Version mismatch between stored fields index and data: " + version + " != " + version2);
                }
                Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.FilePointer);

                packedIntsVersion = vectorsStream.ReadVInt();
                chunkSize = vectorsStream.ReadVInt();
                decompressor = compressionMode.NewDecompressor();
                this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0);

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this, indexStream);
                }
            }
        }
        private static IDictionary<string, FileEntry> ReadLegacyEntries(IndexInput stream, int firstInt)
        {
            IDictionary<string, FileEntry> entries = new Dictionary<string, FileEntry>();
            int count;
            bool stripSegmentName;
            if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION)
            {
                if (firstInt < CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX)
                {
                    throw new CorruptIndexException("Incompatible format version: " + firstInt + " expected >= " + CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX + " (resource: " + stream + ")");
                }
                // It's a post-3.1 index, read the count.
                count = stream.ReadVInt();
                stripSegmentName = false;
            }
            else
            {
                count = firstInt;
                stripSegmentName = true;
            }

            // read the directory and init files
            long streamLength = stream.Length();
            FileEntry entry = null;
            for (int i = 0; i < count; i++)
            {
                long offset = stream.ReadLong();
                if (offset < 0 || offset > streamLength)
                {
                    throw new CorruptIndexException("Invalid CFS entry offset: " + offset + " (resource: " + stream + ")");
                }
                string id = stream.ReadString();

                if (stripSegmentName)
                {
                    // Fix the id to not include the segment names. this is relevant for
                    // pre-3.1 indexes.
                    id = IndexFileNames.StripSegmentName(id);
                }

                if (entry != null)
                {
                    // set length of the previous entry
                    entry.Length = offset - entry.Offset;
                }

                entry = new FileEntry();
                entry.Offset = offset;

                FileEntry previous = entries[id] = entry;
                if (previous != null)
                {
                    throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + stream);
                }
            }

            // set the length of the final entry
            if (entry != null)
            {
                entry.Length = streamLength - entry.Offset;
            }

            return entries;
        }
 private void ReadFields(IndexInput meta)
 {
     int fieldNumber = meta.ReadVInt();
     while (fieldNumber != -1)
     {
         int fieldType = meta.ReadByte();
         if (fieldType == NUMBER)
         {
             numerics[fieldNumber] = ReadNumericEntry(meta);
         }
         else if (fieldType == BYTES)
         {
             binaries[fieldNumber] = ReadBinaryEntry(meta);
         }
         else if (fieldType == SORTED)
         {
             sorteds[fieldNumber] = ReadSortedEntry(meta);
         }
         else if (fieldType == SORTED_SET)
         {
             sortedSets[fieldNumber] = ReadSortedSetEntry(meta);
         }
         else
         {
             throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
         }
         fieldNumber = meta.ReadVInt();
     }
 }
Beispiel #25
0
        /// <summary>
        /// Read the next block of data (<code>For</code> format).
        /// </summary>
        /// <param name="in">        the input to use to read data </param>
        /// <param name="encoded">   a buffer that can be used to store encoded data </param>
        /// <param name="decoded">   where to write decoded data </param>
        /// <exception cref="IOException"> If there is a low-level I/O error </exception>
        public void ReadBlock(IndexInput @in, sbyte[] encoded, int[] decoded)
        {
            int numBits = @in.ReadByte();
            Debug.Assert(numBits <= 32, numBits.ToString());

            if (numBits == ALL_VALUES_EQUAL)
            {
                int value = @in.ReadVInt();
                CollectionsHelper.Fill(decoded, 0, Lucene41PostingsFormat.BLOCK_SIZE, value);
                return;
            }

            int encodedSize = EncodedSizes[numBits];
            @in.ReadBytes(encoded, 0, encodedSize);

            PackedInts.Decoder decoder = Decoders[numBits];
            int iters = Iterations[numBits];
            Debug.Assert(iters * decoder.ByteValueCount() >= Lucene41PostingsFormat.BLOCK_SIZE);

            decoder.Decode(encoded, 0, decoded, 0, iters);
        }
Beispiel #26
0
        /// <summary>
        /// Helper method that reads CFS entries from an input stream </summary>
        private static IDictionary <string, FileEntry> ReadEntries(IndexInputSlicer handle, Directory dir, string name)
        {
            System.IO.IOException priorE        = null;
            IndexInput            stream        = null;
            ChecksumIndexInput    entriesStream = null;

            // read the first VInt. If it is negative, it's the version number
            // otherwise it's the count (pre-3.1 indexes)
            try
            {
                IDictionary <string, FileEntry> mapping;
                stream = handle.OpenFullSlice();
                int firstInt = stream.ReadVInt();
                // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible
                // and separate norms/etc are outside of cfs.
                if (firstInt == CODEC_MAGIC_BYTE1)
                {
                    byte secondByte = stream.ReadByte();
                    byte thirdByte  = stream.ReadByte();
                    byte fourthByte = stream.ReadByte();
                    if (secondByte != CODEC_MAGIC_BYTE2 || thirdByte != CODEC_MAGIC_BYTE3 || fourthByte != CODEC_MAGIC_BYTE4)
                    {
                        throw new CorruptIndexException("Illegal/impossible header for CFS file: " + secondByte + "," + thirdByte + "," + fourthByte);
                    }
                    int    version         = CodecUtil.CheckHeaderNoMagic(stream, CompoundFileWriter.DATA_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT);
                    string entriesFileName = IndexFileNames.SegmentFileName(IndexFileNames.StripExtension(name), "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
                    entriesStream = dir.OpenChecksumInput(entriesFileName, IOContext.READONCE);
                    CodecUtil.CheckHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT);
                    int numEntries = entriesStream.ReadVInt();
                    mapping = new Dictionary <string, FileEntry>(numEntries);
                    for (int i = 0; i < numEntries; i++)
                    {
                        FileEntry fileEntry = new FileEntry();
                        string    id        = entriesStream.ReadString();

                        //If the key was already present
                        if (mapping.ContainsKey(id))
                        {
                            throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + entriesStream);
                        }
                        else
                        {
                            mapping[id] = fileEntry;
                        }
                        fileEntry.Offset = entriesStream.ReadLong();
                        fileEntry.Length = entriesStream.ReadLong();
                    }
                    if (version >= CompoundFileWriter.VERSION_CHECKSUM)
                    {
                        CodecUtil.CheckFooter(entriesStream);
                    }
                    else
                    {
                        CodecUtil.CheckEOF(entriesStream);
                    }
                }
                else
                {
                    // TODO remove once 3.x is not supported anymore
                    mapping = ReadLegacyEntries(stream, firstInt);
                }
                return(mapping);
            }
            catch (System.IO.IOException ioe)
            {
                priorE = ioe;
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(priorE, stream, entriesStream);
            }
            // this is needed until Java 7's real try-with-resources:
            throw new InvalidOperationException("impossible to get here");
        }
 public override void Init(IndexInput termsIn)
 {
     // Make sure we are talking to the matching postings writer
     CodecUtil.CheckHeader(termsIn, Lucene41PostingsWriter.TERMS_CODEC, Lucene41PostingsWriter.VERSION_START, Lucene41PostingsWriter.VERSION_CURRENT);
     int indexBlockSize = termsIn.ReadVInt();
     if (indexBlockSize != Lucene41PostingsFormat.BLOCK_SIZE)
     {
         throw new InvalidOperationException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + Lucene41PostingsFormat.BLOCK_SIZE + ")");
     }
 }
Beispiel #28
0
        public virtual void  Get(int docNum, System.String field, TermVectorMapper mapper, IState state)
        {
            if (tvx != null)
            {
                int fieldNumber = fieldInfos.FieldNumber(field);
                //We need to account for the FORMAT_SIZE at when seeking in the tvx
                //We don't need to do this in other seeks because we already have the
                // file pointer
                //that was written in another file
                SeekTvx(docNum, state);
                //System.out.println("TVX Pointer: " + tvx.getFilePointer());
                long tvdPosition = tvx.ReadLong(state);

                tvd.Seek(tvdPosition, state);
                int fieldCount = tvd.ReadVInt(state);
                //System.out.println("Num Fields: " + fieldCount);
                // There are only a few fields per document. We opt for a full scan
                // rather then requiring that they be ordered. We need to read through
                // all of the fields anyway to get to the tvf pointers.
                int number = 0;
                int found  = -1;
                for (int i = 0; i < fieldCount; i++)
                {
                    if (format >= FORMAT_VERSION)
                    {
                        number = tvd.ReadVInt(state);
                    }
                    else
                    {
                        number += tvd.ReadVInt(state);
                    }

                    if (number == fieldNumber)
                    {
                        found = i;
                    }
                }

                // This field, although valid in the segment, was not found in this
                // document
                if (found != -1)
                {
                    // Compute position in the tvf file
                    long position;
                    if (format >= FORMAT_VERSION2)
                    {
                        position = tvx.ReadLong(state);
                    }
                    else
                    {
                        position = tvd.ReadVLong(state);
                    }
                    for (int i = 1; i <= found; i++)
                    {
                        position += tvd.ReadVLong(state);
                    }

                    mapper.SetDocumentNumber(docNum);
                    ReadTermVector(field, position, mapper, state);
                }
                else
                {
                    //System.out.println("Fieldable not found");
                }
            }
            else
            {
                //System.out.println("No tvx file");
            }
        }