示例#1
0
文件: TermBuffer.cs 项目: sinsay/SSE
        public void Read(IndexInput input, FieldInfos fieldInfos)
        {
            this.term = null; // invalidate cache
            int start = input.ReadVInt();
            int length = input.ReadVInt();
            int totalLength = start + length;
            if (preUTF8Strings)
            {
                text.SetLength(totalLength);
                input.ReadChars(text.result, start, length);
            }
            else
            {

                if (dirty)
                {
                    // Fully convert all bytes since bytes is dirty
                    UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
                    dirty = false;
                }
                else
                {
                    // Incrementally convert only the UTF8 bytes that are new:
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
                }
            }
            this.field = fieldInfos.FieldName(input.ReadVInt());
        }
示例#2
0
 public int NextPosition()
 {
     // perform lazy skips if neccessary
     LazySkip();
     proxCount--;
     return(position += proxStream.ReadVInt());
 }
        protected internal override int ReadSkipData(int level, IndexInput skipStream)
        {
            int delta;

            if (currentFieldStoresPayloads)
            {
                // the current field stores payloads.
                // if the doc delta is odd then we have
                // to read the current payload length
                // because it differs from the length of the
                // previous payload
                delta = skipStream.ReadVInt();
                if ((delta & 1) != 0)
                {
                    payloadLength[level] = skipStream.ReadVInt();
                }
                delta = Number.URShift(delta, 1);
            }
            else
            {
                delta = skipStream.ReadVInt();
            }
            freqPointer[level] += skipStream.ReadVInt();
            proxPointer[level] += skipStream.ReadVInt();

            return(delta);
        }
示例#4
0
        public void  Read(IndexInput input, FieldInfos fieldInfos)
        {
            this.term = null;             // invalidate cache
            int start       = input.ReadVInt();
            int length      = input.ReadVInt();
            int totalLength = start + length;

            if (preUTF8Strings)
            {
                text.SetLength(totalLength);
                input.ReadChars(text.result, start, length);
            }
            else
            {
                if (dirty)
                {
                    // Fully convert all bytes since bytes is dirty
                    UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
                    dirty = false;
                }
                else
                {
                    // Incrementally convert only the UTF8 bytes that are new:
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
                }
            }
            this.field = fieldInfos.FieldName(input.ReadVInt());
        }
        public virtual bool Next()
        {
            while (true)
            {
                if (count == df)
                {
                    return(false);
                }

                int docCode = freqStream.ReadVInt();
                doc += (int)(((uint)docCode) >> 1);                   // shift off low bit
                if ((docCode & 1) != 0)
                {
                    // if low bit is set
                    freq = 1;
                }
                // freq is one
                else
                {
                    freq = freqStream.ReadVInt();                     // else read freq
                }
                count++;

                if (deletedDocs == null || !deletedDocs.Get(doc))
                {
                    break;
                }
                SkippingDoc();
            }
            return(true);
        }
示例#6
0
        public /*internal*/ Document Doc(int n, FieldSelector fieldSelector)
        {
            SeekIndex(n);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            Document doc       = new Document();
            int      numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int                 fieldNumber = fieldsStream.ReadVInt();
                FieldInfo           fi          = fieldInfos.FieldInfo(fieldNumber);
                FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name);

                byte bits = fieldsStream.ReadByte();
                System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                bool tokenize   = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
                bool binary     = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
                //TODO: Find an alternative approach here if this list continues to grow beyond the
                //list of 5 or 6 currently here.  See Lucene 762 for discussion
                if (acceptField.Equals(FieldSelectorResult.LOAD))
                {
                    AddField(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.LOAD_FOR_MERGE))
                {
                    AddFieldForMerge(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
                {
                    AddField(doc, fi, binary, compressed, tokenize);
                    break;                     //Get out of this loop
                }
                else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
                {
                    AddFieldLazy(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE))
                {
                    SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
                {
                    AddFieldSize(doc, fi, binary, compressed);
                    break;
                }
                else
                {
                    SkipField(binary, compressed);
                }
            }

            return(doc);
        }
示例#7
0
		public void  Read(IndexInput input, FieldInfos fieldInfos)
		{
			this.term = null; // invalidate cache
			int start = input.ReadVInt();
			int length = input.ReadVInt();
			int totalLength = start + length;
			SetTextLength(totalLength);
			input.ReadChars(this.text, start, length);
			this.field = fieldInfos.FieldName(input.ReadVInt());
		}
示例#8
0
        public void  Read(IndexInput input, FieldInfos fieldInfos)
        {
            this.term = null;             // invalidate cache
            int start       = input.ReadVInt();
            int length      = input.ReadVInt();
            int totalLength = start + length;

            SetTextLength(totalLength);
            input.ReadChars(this.text, start, length);
            this.field = fieldInfos.FieldName(input.ReadVInt());
        }
示例#9
0
        private void  Read(IndexInput input, System.String fileName)
        {
            int firstInt = input.ReadVInt();

            if (firstInt < 0)
            {
                // This is a real format
                format = firstInt;
            }
            else
            {
                format = FORMAT_PRE;
            }

            if (format != FORMAT_PRE & format != FORMAT_START)
            {
                throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
            }

            int size;

            if (format == FORMAT_PRE)
            {
                size = firstInt;
            }
            else
            {
                size = input.ReadVInt();                 //read in the size
            }

            for (int i = 0; i < size; i++)
            {
                System.String name            = StringHelper.Intern(input.ReadString());
                byte          bits            = input.ReadByte();
                bool          isIndexed       = (bits & IS_INDEXED) != 0;
                bool          storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                bool          storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                bool          storeOffsetWithTermVector    = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
                bool          omitNorms                = (bits & OMIT_NORMS) != 0;
                bool          storePayloads            = (bits & STORE_PAYLOADS) != 0;
                bool          omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;

                AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
            }

            if (input.GetFilePointer() != input.Length())
            {
                throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length());
            }
        }
        private int ReadDeltaPosition()
        {
            int delta = proxStream.ReadVInt();

            if (currentFieldStoresPayloads)
            {
                // if the current field stores payloads then
                // the position delta is shifted one bit to the left.
                // if the LSB is set, then we have to read the current
                // payload length
                if ((delta & 1) != 0)
                {
                    payloadLength = proxStream.ReadVInt();
                }
                delta             = SupportClass.Number.URShift(delta, 1);
                needToLoadPayload = true;
            }
            return(delta);
        }
示例#11
0
        /// <summary>Increments the enumeration to the next element.  True if one exists.</summary>
        public override bool Next()
        {
            if (position++ >= size - 1)
            {
                prevBuffer.Set(termBuffer);
                termBuffer.Reset();
                return(false);
            }

            prevBuffer.Set(termBuffer);
            termBuffer.Read(input, fieldInfos);

            termInfo.docFreq      = input.ReadVInt();        // read doc freq
            termInfo.freqPointer += input.ReadVLong();       // read freq pointer
            termInfo.proxPointer += input.ReadVLong();       // read prox pointer

            if (format == -1)
            {
                //  just read skipOffset in order to increment  file pointer;
                // value is never used since skipTo is switched off
                if (!isIndex)
                {
                    if (termInfo.docFreq > formatM1SkipInterval)
                    {
                        termInfo.skipOffset = input.ReadVInt();
                    }
                }
            }
            else
            {
                if (termInfo.docFreq >= skipInterval)
                {
                    termInfo.skipOffset = input.ReadVInt();
                }
            }

            if (isIndex)
            {
                indexPointer += input.ReadVLong();                 // read index pointer
            }
            return(true);
        }
示例#12
0
        public CompoundFileReader(Directory dir, string name, int readBufferSize)
        {
            directory           = dir;
            fileName            = name;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                stream = dir.OpenInput(name, readBufferSize);

                // read the directory and init files
                int       count = stream.ReadVInt();
                FileEntry entry = null;
                for (int i = 0; i < count; i++)
                {
                    long   offset = stream.ReadLong();
                    string id     = stream.ReadString();

                    if (entry != null)
                    {
                        // set length of the previous entry
                        entry.length = offset - entry.offset;
                    }

                    entry = new FileEntry {
                        offset = offset
                    };
                    entries[id] = entry;
                }

                // set the length of the final entry
                if (entry != null)
                {
                    entry.length = stream.Length() - entry.offset;
                }

                success = true;
            }
            finally
            {
                if (!success && (stream != null))
                {
                    try
                    {
                        stream.Close();
                    }
                    catch (System.IO.IOException)
                    {
                    }
                }
            }
        }
示例#13
0
		public CompoundFileReader(Directory dir, System.String name, int readBufferSize)
		{
			directory = dir;
			fileName = name;
			this.readBufferSize = readBufferSize;
			
			bool success = false;
			
			try
			{
				stream = dir.OpenInput(name, readBufferSize);
				
				// read the directory and init files
				int count = stream.ReadVInt();
				FileEntry entry = null;
				for (int i = 0; i < count; i++)
				{
					long offset = stream.ReadLong();
					System.String id = stream.ReadString();
					
					if (entry != null)
					{
						// set length of the previous entry
						entry.length = offset - entry.offset;
					}
					
					entry = new FileEntry();
					entry.offset = offset;
					entries[id] = entry;
				}
				
				// set the length of the final entry
				if (entry != null)
				{
					entry.length = stream.Length() - entry.offset;
				}
				
				success = true;
			}
			finally
			{
				if (!success && (stream != null))
				{
					try
					{
						stream.Close();
					}
					catch (System.IO.IOException e)
					{
					}
				}
			}
		}
示例#14
0
        public virtual bool Next()
        {
            while (true)
            {
                if (count == df)
                {
                    return(false);
                }
                int docCode = freqStream.ReadVInt();

                if (currentFieldOmitTermFreqAndPositions)
                {
                    doc += docCode;
                    freq = 1;
                }
                else
                {
                    doc += Number.URShift(docCode, 1); // shift off low bit
                    if ((docCode & 1) != 0)
                    {
                        // if low bit is set
                        freq = 1;
                    }
                    // freq is one
                    else
                    {
                        freq = freqStream.ReadVInt(); // else read freq
                    }
                }

                count++;

                if (deletedDocs == null || !deletedDocs.Get(doc))
                {
                    break;
                }
                SkippingDoc();
            }
            return(true);
        }
示例#15
0
        /// <summary>read as a d-gaps list </summary>
        private void  ReadDgaps(IndexInput input)
        {
            size  = input.ReadInt();            // (re)read size
            count = input.ReadInt();            // read count
            bits  = new byte[(size >> 3) + 1];  // allocate bits
            int last = 0;
            int n    = Count();

            while (n > 0)
            {
                last      += input.ReadVInt();
                bits[last] = input.ReadByte();
                n         -= BYTE_COUNTS[bits[last] & 0xFF];
            }
        }
示例#16
0
        private void  Read(IndexInput input)
        {
            int size = input.ReadVInt();             //read in the size

            for (int i = 0; i < size; i++)
            {
                System.String name            = String.Intern(input.ReadString());
                byte          bits            = input.ReadByte();
                bool          isIndexed       = (bits & IS_INDEXED) != 0;
                bool          storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                bool          storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                bool          storeOffsetWithTermVector    = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
                bool          omitNorms = (bits & OMIT_NORMS) != 0;

                AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms);
            }
        }
        internal static NumericEntry ReadNumericEntry(IndexInput meta)
        {
            NumericEntry entry = new NumericEntry();
            entry.Format = meta.ReadVInt();
            entry.MissingOffset = meta.ReadLong();
            entry.PackedIntsVersion = meta.ReadVInt();
            entry.Offset = meta.ReadLong();
            entry.Count = meta.ReadVLong();
            entry.BlockSize = meta.ReadVInt();
            switch (entry.Format)
            {
                case Lucene45DocValuesConsumer.GCD_COMPRESSED:
                    entry.MinValue = meta.ReadLong();
                    entry.Gcd = meta.ReadLong();
                    break;

                case Lucene45DocValuesConsumer.TABLE_COMPRESSED:
                    if (entry.Count > int.MaxValue)
                    {
                        throw new Exception("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
                    }
                    int uniqueValues = meta.ReadVInt();
                    if (uniqueValues > 256)
                    {
                        throw new Exception("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
                    }
                    entry.Table = new long[uniqueValues];
                    for (int i = 0; i < uniqueValues; ++i)
                    {
                        entry.Table[i] = meta.ReadLong();
                    }
                    break;

                case Lucene45DocValuesConsumer.DELTA_COMPRESSED:
                    break;

                default:
                    throw new Exception("Unknown format: " + entry.Format + ", input=" + meta);
            }
            return entry;
        }
        private void Read(IndexInput input)
        {
            int size = input.ReadVInt(); //read in the size
            for (int i = 0; i < size; i++)
            {
                System.String name = String.Intern(input.ReadString());
                byte bits = input.ReadByte();
                bool isIndexed = (bits & IS_INDEXED) != 0;
                bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
                bool omitNorms = (bits & OMIT_NORMS) != 0;

                AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms);
            }
        }
示例#19
0
        /// <summary>Optimized implementation. </summary>
        public virtual bool SkipTo(int target)
        {
            if (df >= skipInterval)
            {
                // optimized case

                if (skipStream == null)
                {
                    skipStream = (IndexInput)freqStream.Clone();                      // lazily clone
                }
                if (!haveSkipped)
                {
                    // lazily seek skip stream
                    skipStream.Seek(skipPointer);
                    haveSkipped = true;
                }

                // scan skip data
                int  lastSkipDoc     = skipDoc;
                long lastFreqPointer = freqStream.GetFilePointer();
                long lastProxPointer = -1;
                int  numSkipped      = -1 - (count % skipInterval);

                while (target > skipDoc)
                {
                    lastSkipDoc     = skipDoc;
                    lastFreqPointer = freqPointer;
                    lastProxPointer = proxPointer;

                    if (skipDoc != 0 && skipDoc >= doc)
                    {
                        numSkipped += skipInterval;
                    }

                    if (skipCount >= numSkips)
                    {
                        break;
                    }

                    skipDoc     += skipStream.ReadVInt();
                    freqPointer += skipStream.ReadVInt();
                    proxPointer += skipStream.ReadVInt();

                    skipCount++;
                }

                // if we found something to skip, then skip it
                if (lastFreqPointer > freqStream.GetFilePointer())
                {
                    freqStream.Seek(lastFreqPointer);
                    SkipProx(lastProxPointer);

                    doc    = lastSkipDoc;
                    count += numSkipped;
                }
            }

            // done skipping, now just scan
            do
            {
                if (!Next())
                {
                    return(false);
                }
            }while (target > doc);
            return(true);
        }
示例#20
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <param name="mapper">The mapper used to map the TermVector
        /// </param>
        /// <returns> The TermVector located at that position
        /// </returns>
        /// <throws>  IOException </throws>
        private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return;
            }

            bool storePositions;
            bool storeOffsets;

            if (tvfFormat == FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets   = false;
            }
            mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            char[] buffer         = new char[10];     // init the buffer with a length of 10 character
            char[] previousBuffer = new char[] {};

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;
                if (buffer.Length < totalLength)
                {
                    // increase buffer
                    buffer = null;                     // give a hint to garbage collector
                    buffer = new char[totalLength];

                    if (start > 0)
                    {
                        // just copy if necessary
                        Array.Copy(previousBuffer, 0, buffer, 0, start);
                    }
                }

                tvf.ReadChars(buffer, start, deltaLength);
                System.String term = new System.String(buffer, 0, totalLength);
                previousBuffer = buffer;
                int   freq      = tvf.ReadVInt();
                int[] positions = null;
                if (storePositions)
                {
                    //read in the positions
                    //does the mapper even care about positions?
                    if (mapper.IsIgnoringPositions() == false)
                    {
                        positions = new int[freq];
                        int prevPosition = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            positions[j] = prevPosition + tvf.ReadVInt();
                            prevPosition = positions[j];
                        }
                    }
                    else
                    {
                        //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
                        //
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                        }
                    }
                }
                TermVectorOffsetInfo[] offsets = null;
                if (storeOffsets)
                {
                    //does the mapper even care about offsets?
                    if (mapper.IsIgnoringOffsets() == false)
                    {
                        offsets = new TermVectorOffsetInfo[freq];
                        int prevOffset = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            int startOffset = prevOffset + tvf.ReadVInt();
                            int endOffset   = startOffset + tvf.ReadVInt();
                            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
                            prevOffset = endOffset;
                        }
                    }
                    else
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                            tvf.ReadVInt();
                        }
                    }
                }
                mapper.Map(term, freq, offsets, positions);
            }
        }
示例#21
0
		private void  Read(IndexInput input, System.String fileName)
		{
			int firstInt = input.ReadVInt();
			
			if (firstInt < 0)
			{
				// This is a real format
				format = firstInt;
			}
			else
			{
				format = FORMAT_PRE;
			}
			
			if (format != FORMAT_PRE & format != FORMAT_START)
			{
				throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
			}
			
			int size;
			if (format == FORMAT_PRE)
			{
				size = firstInt;
			}
			else
			{
				size = input.ReadVInt(); //read in the size
			}
			
			for (int i = 0; i < size; i++)
			{
				System.String name = StringHelper.Intern(input.ReadString());
				byte bits = input.ReadByte();
				bool isIndexed = (bits & IS_INDEXED) != 0;
				bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
				bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
				bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
				bool omitNorms = (bits & OMIT_NORMS) != 0;
				bool storePayloads = (bits & STORE_PAYLOADS) != 0;
				bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
				
				AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
			}
			
			if (input.GetFilePointer() != input.Length())
			{
				throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length());
			}
		}
        private void ReadSortedField(int fieldNumber, IndexInput meta, FieldInfos infos)
        {
            // sorted = binary + numeric
            if (meta.ReadVInt() != fieldNumber)
            {
                throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            if (meta.ReadByte() != Lucene45DocValuesFormat.BINARY)
            {
                throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            BinaryEntry b = ReadBinaryEntry(meta);
            Binaries[fieldNumber] = b;

            if (meta.ReadVInt() != fieldNumber)
            {
                throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC)
            {
                throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            NumericEntry n = ReadNumericEntry(meta);
            Ords[fieldNumber] = n;
        }
示例#23
0
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext)
        {
            string     fileName = IndexFileNames.SegmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION);
            IndexInput input    = directory.OpenInput(fileName, iocontext);

            try
            {
                int format = input.ReadVInt();

                if (format > FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT);
                }
                if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW)
                {
                    throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT);
                }

                int         size  = input.ReadVInt(); //read in the size
                FieldInfo[] infos = new FieldInfo[size];

                for (int i = 0; i < size; i++)
                {
                    string name            = input.ReadString();
                    int    fieldNumber     = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.ReadInt() : i;
                    byte   bits            = input.ReadByte();
                    bool   isIndexed       = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0;
                    bool   storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0;
                    bool   omitNorms       = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0;
                    bool   storePayloads   = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0;
                    FieldInfo.IndexOptions?indexOptions;
                    if (!isIndexed)
                    {
                        indexOptions = null;
                    }
                    else if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0)
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                    }
                    else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0)
                    {
                        if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS)
                        {
                            indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
                        }
                        else
                        {
                            throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
                        }
                    }
                    else
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    }

                    // LUCENE-3027: past indices were able to write
                    // storePayloads=true when omitTFAP is also true,
                    // which is invalid.  We correct that, here:
                    if (indexOptions != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                    {
                        storePayloads = false;
                    }

                    DocValuesType_e?normType = isIndexed && !omitNorms ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null;
                    if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != null)
                    {
                        // RW can have norms but doesn't write them
                        normType = input.ReadByte() != 0 ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null;
                    }

                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, null, normType, null);
                }

                if (input.FilePointer != input.Length())
                {
                    throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length() + " (resource: " + input + ")");
                }
                return(new FieldInfos(infos));
            }
            finally
            {
                input.Dispose();
            }
        }
        private void ReadSortedSetFieldWithAddresses(int fieldNumber, IndexInput meta, FieldInfos infos)
        {
            // sortedset = binary + numeric (addresses) + ordIndex
            if (meta.ReadVInt() != fieldNumber)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            if (meta.ReadByte() != Lucene45DocValuesFormat.BINARY)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            BinaryEntry b = ReadBinaryEntry(meta);
            Binaries[fieldNumber] = b;

            if (meta.ReadVInt() != fieldNumber)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            NumericEntry n1 = ReadNumericEntry(meta);
            Ords[fieldNumber] = n1;

            if (meta.ReadVInt() != fieldNumber)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC)
            {
                throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
            }
            NumericEntry n2 = ReadNumericEntry(meta);
            OrdIndexes[fieldNumber] = n2;
        }
示例#25
0
 /// <summary>
 /// read as a d-gaps list </summary>
 private void ReadSetDgaps(IndexInput input)
 {
     Size_Renamed = input.ReadInt(); // (re)read size
     Count_Renamed = input.ReadInt(); // read count
     Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits
     int last = 0;
     int n = Count();
     while (n > 0)
     {
         last += input.ReadVInt();
         Bits[last] = input.ReadByte();
         n -= BitUtil.BitCount(Bits[last]);
         Debug.Assert(n >= 0);
     }
 }
示例#26
0
 public void Read(IndexInput input, FieldInfos fieldInfos)
 {
     this.Term = null; // invalidate cache
     NewSuffixStart = input.ReadVInt();
     int length = input.ReadVInt();
     int totalLength = NewSuffixStart + length;
     Debug.Assert(totalLength <= ByteBlockPool.BYTE_BLOCK_SIZE - 2, "termLength=" + totalLength + ",resource=" + input);
     if (Bytes.Bytes.Length < totalLength)
     {
         Bytes.Grow(totalLength);
     }
     Bytes.Length = totalLength;
     input.ReadBytes(Bytes.Bytes, NewSuffixStart, length);
     int fieldNumber = input.ReadVInt();
     if (fieldNumber != CurrentFieldNumber)
     {
         CurrentFieldNumber = fieldNumber;
         // NOTE: too much sneakiness here, seriously this is a negative vint?!
         if (CurrentFieldNumber == -1)
         {
             Field = "";
         }
         else
         {
             Debug.Assert(fieldInfos.FieldInfo(CurrentFieldNumber) != null, CurrentFieldNumber.ToString());
             Field = String.Intern(fieldInfos.FieldInfo(CurrentFieldNumber).Name);
         }
     }
     else
     {
         Debug.Assert(Field.Equals(fieldInfos.FieldInfo(fieldNumber).Name), "currentFieldNumber=" + CurrentFieldNumber + " field=" + Field + " vs " + fieldInfos.FieldInfo(fieldNumber) == null ? "null" : fieldInfos.FieldInfo(fieldNumber).Name);
     }
 }
示例#27
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <param name="mapper">The mapper used to map the TermVector
        /// </param>
        /// <throws>  IOException </throws>
        private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return;
            }

            bool storePositions;
            bool storeOffsets;

            if (format >= FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets   = false;
            }
            mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            byte[] byteBuffer;
            char[] charBuffer;
            bool   preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;

            // init the buffers
            if (preUTF8)
            {
                charBuffer = new char[10];
                byteBuffer = null;
            }
            else
            {
                charBuffer = null;
                byteBuffer = new byte[20];
            }

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;

                System.String term;

                if (preUTF8)
                {
                    // Term stored as java chars
                    if (charBuffer.Length < totalLength)
                    {
                        char[] newCharBuffer = new char[(int)(1.5 * totalLength)];
                        Array.Copy(charBuffer, 0, newCharBuffer, 0, start);
                        charBuffer = newCharBuffer;
                    }
                    tvf.ReadChars(charBuffer, start, deltaLength);
                    term = new System.String(charBuffer, 0, totalLength);
                }
                else
                {
                    // Term stored as utf8 bytes
                    if (byteBuffer.Length < totalLength)
                    {
                        byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)];
                        Array.Copy(byteBuffer, 0, newByteBuffer, 0, start);
                        byteBuffer = newByteBuffer;
                    }
                    tvf.ReadBytes(byteBuffer, start, deltaLength);
                    term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength);
                }
                int   freq      = tvf.ReadVInt();
                int[] positions = null;
                if (storePositions)
                {
                    //read in the positions
                    //does the mapper even care about positions?
                    if (mapper.IsIgnoringPositions() == false)
                    {
                        positions = new int[freq];
                        int prevPosition = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            positions[j] = prevPosition + tvf.ReadVInt();
                            prevPosition = positions[j];
                        }
                    }
                    else
                    {
                        //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
                        //
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                        }
                    }
                }
                TermVectorOffsetInfo[] offsets = null;
                if (storeOffsets)
                {
                    //does the mapper even care about offsets?
                    if (mapper.IsIgnoringOffsets() == false)
                    {
                        offsets = new TermVectorOffsetInfo[freq];
                        int prevOffset = 0;
                        for (int j = 0; j < freq; j++)
                        {
                            int startOffset = prevOffset + tvf.ReadVInt();
                            int endOffset   = startOffset + tvf.ReadVInt();
                            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
                            prevOffset = endOffset;
                        }
                    }
                    else
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            tvf.ReadVInt();
                            tvf.ReadVInt();
                        }
                    }
                }
                mapper.Map(term, freq, offsets, positions);
            }
        }
示例#28
0
        public virtual void  Get(int docNum, System.String field, TermVectorMapper mapper)
        {
            if (tvx != null)
            {
                int fieldNumber = fieldInfos.FieldNumber(field);
                //We need to account for the FORMAT_SIZE at when seeking in the tvx
                //We don't need to do this in other seeks because we already have the
                // file pointer
                //that was written in another file
                SeekTvx(docNum);
                //System.out.println("TVX Pointer: " + tvx.getFilePointer());
                long tvdPosition = tvx.ReadLong();

                tvd.Seek(tvdPosition);
                int fieldCount = tvd.ReadVInt();
                //System.out.println("Num Fields: " + fieldCount);
                // There are only a few fields per document. We opt for a full scan
                // rather then requiring that they be ordered. We need to read through
                // all of the fields anyway to get to the tvf pointers.
                int number = 0;
                int found  = -1;
                for (int i = 0; i < fieldCount; i++)
                {
                    if (format >= FORMAT_VERSION)
                    {
                        number = tvd.ReadVInt();
                    }
                    else
                    {
                        number += tvd.ReadVInt();
                    }

                    if (number == fieldNumber)
                    {
                        found = i;
                    }
                }

                // This field, although valid in the segment, was not found in this
                // document
                if (found != -1)
                {
                    // Compute position in the tvf file
                    long position;
                    if (format >= FORMAT_VERSION2)
                    {
                        position = tvx.ReadLong();
                    }
                    else
                    {
                        position = tvd.ReadVLong();
                    }
                    for (int i = 1; i <= found; i++)
                    {
                        position += tvd.ReadVLong();
                    }

                    mapper.SetDocumentNumber(docNum);
                    ReadTermVector(field, position, mapper);
                }
                else
                {
                    //System.out.println("Fieldable not found");
                }
            }
            else
            {
                //System.out.println("No tvx file");
            }
        }
        internal static BinaryEntry ReadBinaryEntry(IndexInput meta)
        {
            BinaryEntry entry = new BinaryEntry();
            entry.Format = meta.ReadVInt();
            entry.MissingOffset = meta.ReadLong();
            entry.MinLength = meta.ReadVInt();
            entry.MaxLength = meta.ReadVInt();
            entry.Count = meta.ReadVLong();
            entry.Offset = meta.ReadLong();
            switch (entry.Format)
            {
                case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED:
                    break;

                case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED:
                    entry.AddressInterval = meta.ReadVInt();
                    entry.AddressesOffset = meta.ReadLong();
                    entry.PackedIntsVersion = meta.ReadVInt();
                    entry.BlockSize = meta.ReadVInt();
                    break;

                case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED:
                    entry.AddressesOffset = meta.ReadLong();
                    entry.PackedIntsVersion = meta.ReadVInt();
                    entry.BlockSize = meta.ReadVInt();
                    break;

                default:
                    throw new Exception("Unknown format: " + entry.Format + ", input=" + meta);
            }
            return entry;
        }
示例#30
0
        public /*internal*/ Document Doc(int n)
        {
            indexStream.Seek(n * 8L);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            Document doc       = new Document();
            int      numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int       fieldNumber = fieldsStream.ReadVInt();
                FieldInfo fi          = fieldInfos.FieldInfo(fieldNumber);

                byte bits = fieldsStream.ReadByte();

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                bool tokenize   = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;

                if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0)
                {
                    byte[] b = new byte[fieldsStream.ReadVInt()];
                    fieldsStream.ReadBytes(b, 0, b.Length);
                    if (compressed)
                    {
                        doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
                    }
                    else
                    {
                        doc.Add(new Field(fi.name, b, Field.Store.YES));
                    }
                }
                else
                {
                    Field.Index index;
                    Field.Store store = Field.Store.YES;

                    if (fi.isIndexed && tokenize)
                    {
                        index = Field.Index.TOKENIZED;
                    }
                    else if (fi.isIndexed && !tokenize)
                    {
                        index = Field.Index.UN_TOKENIZED;
                    }
                    else
                    {
                        index = Field.Index.NO;
                    }

                    Field.TermVector termVector = null;
                    if (fi.storeTermVector)
                    {
                        if (fi.storeOffsetWithTermVector)
                        {
                            if (fi.storePositionWithTermVector)
                            {
                                termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
                            }
                            else
                            {
                                termVector = Field.TermVector.WITH_OFFSETS;
                            }
                        }
                        else if (fi.storePositionWithTermVector)
                        {
                            termVector = Field.TermVector.WITH_POSITIONS;
                        }
                        else
                        {
                            termVector = Field.TermVector.YES;
                        }
                    }
                    else
                    {
                        termVector = Field.TermVector.NO;
                    }

                    if (compressed)
                    {
                        store = Field.Store.COMPRESS;
                        byte[] b = new byte[fieldsStream.ReadVInt()];
                        fieldsStream.ReadBytes(b, 0, b.Length);
                        Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                    else
                    {
                        Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
                        f.SetOmitNorms(fi.omitNorms);
                        doc.Add(f);
                    }
                }
            }

            return(doc);
        }
示例#31
0
 /// <summary>read as a d-gaps list </summary>
 private void ReadDgaps(IndexInput input)
 {
     size = input.ReadInt(); // (re)read size
     count = input.ReadInt(); // read count
     bits = new byte[(size >> 3) + 1]; // allocate bits
     int last = 0;
     int n = Count();
     while (n > 0)
     {
         last += input.ReadVInt();
         bits[last] = input.ReadByte();
         n -= BYTE_COUNTS[bits[last] & 0xFF];
     }
 }
 private void ReadFields(IndexInput meta, FieldInfos infos)
 {
     int fieldNumber = meta.ReadVInt();
     while (fieldNumber != -1)
     {
         // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check
         // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616
         if (fieldNumber < 0)
         {
             // trickier to validate more: because we re-use for norms, because we use multiple entries
             // for "composite" types like sortedset, etc.
             throw new Exception("Invalid field number: " + fieldNumber + " (resource=" + meta + ")");
         }
         byte type = meta.ReadByte();
         if (type == Lucene45DocValuesFormat.NUMERIC)
         {
             Numerics[fieldNumber] = ReadNumericEntry(meta);
         }
         else if (type == Lucene45DocValuesFormat.BINARY)
         {
             BinaryEntry b = ReadBinaryEntry(meta);
             Binaries[fieldNumber] = b;
         }
         else if (type == Lucene45DocValuesFormat.SORTED)
         {
             ReadSortedField(fieldNumber, meta, infos);
         }
         else if (type == Lucene45DocValuesFormat.SORTED_SET)
         {
             SortedSetEntry ss = ReadSortedSetEntry(meta);
             SortedSets[fieldNumber] = ss;
             if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES)
             {
                 ReadSortedSetFieldWithAddresses(fieldNumber, meta, infos);
             }
             else if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED)
             {
                 if (meta.ReadVInt() != fieldNumber)
                 {
                     throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
                 }
                 if (meta.ReadByte() != Lucene45DocValuesFormat.SORTED)
                 {
                     throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
                 }
                 ReadSortedField(fieldNumber, meta, infos);
             }
             else
             {
                 throw new Exception();
             }
         }
         else
         {
             throw new Exception("invalid type: " + type + ", resource=" + meta);
         }
         fieldNumber = meta.ReadVInt();
     }
 }
示例#33
0
 /// <summary>
 /// Construct a direct <seealso cref="Reader"/> from an <seealso cref="IndexInput"/>. this method
 /// is useful to restore data from streams which have been created using
 /// <seealso cref="PackedInts#getWriter(DataOutput, int, int, float)"/>.
 /// </p><p>
 /// The returned reader will have very little memory overhead, but every call
 /// to <seealso cref="Reader#get(int)"/> is likely to perform a disk seek.
 /// </summary>
 /// <param name="in">           the stream to read data from </param>
 /// <returns> a direct Reader </returns>
 /// <exception cref="IOException"> If there is a low-level I/O error
 /// @lucene.internal </exception>
 public static Reader GetDirectReader(IndexInput @in)
 {
     int version = CodecUtil.CheckHeader(@in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
     int bitsPerValue = @in.ReadVInt();
     Debug.Assert(bitsPerValue > 0 && bitsPerValue <= 64, "bitsPerValue=" + bitsPerValue);
     int valueCount = @in.ReadVInt();
     Format format = Format.ById(@in.ReadVInt());
     return GetDirectReaderNoHeader(@in, format, version, valueCount, bitsPerValue);
 }
示例#34
0
        /// <summary> Retrieve the term vector for the given document and field</summary>
        /// <param name="docNum">The document number to retrieve the vector for
        /// </param>
        /// <param name="field">The field within the document to retrieve
        /// </param>
        /// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field.
        /// </returns>
        /// <throws>  IOException if there is an error reading the term vector files </throws>
        public /*internal*/ virtual TermFreqVector Get(int docNum, System.String field)
        {
            // Check if no term vectors are available for this segment at all
            int            fieldNumber = fieldInfos.FieldNumber(field);
            TermFreqVector result      = null;

            if (tvx != null)
            {
                //We need to account for the FORMAT_SIZE at when seeking in the tvx
                //We don't need to do this in other seeks because we already have the
                // file pointer
                //that was written in another file
                tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
                //System.out.println("TVX Pointer: " + tvx.getFilePointer());
                long position = tvx.ReadLong();

                tvd.Seek(position);
                int fieldCount = tvd.ReadVInt();
                //System.out.println("Num Fields: " + fieldCount);
                // There are only a few fields per document. We opt for a full scan
                // rather then requiring that they be ordered. We need to read through
                // all of the fields anyway to get to the tvf pointers.
                int number = 0;
                int found  = -1;
                for (int i = 0; i < fieldCount; i++)
                {
                    if (tvdFormat == TermVectorsWriter.FORMAT_VERSION)
                    {
                        number = tvd.ReadVInt();
                    }
                    else
                    {
                        number += tvd.ReadVInt();
                    }

                    if (number == fieldNumber)
                    {
                        found = i;
                    }
                }

                // This field, although valid in the segment, was not found in this
                // document
                if (found != -1)
                {
                    // Compute position in the tvf file
                    position = 0;
                    for (int i = 0; i <= found; i++)
                    {
                        position += tvd.ReadVLong();
                    }

                    result = ReadTermVector(field, position);
                }
                else
                {
                    //System.out.println("Field not found");
                }
            }
            else
            {
                //System.out.println("No tvx file");
            }
            return(result);
        }
示例#35
0
        protected internal override int ReadSkipData(int level, IndexInput skipStream)
        {
            int delta;
            if (currentFieldStoresPayloads)
            {
                // the current field stores payloads.
                // if the doc delta is odd then we have
                // to read the current payload length
                // because it differs from the length of the
                // previous payload
                delta = skipStream.ReadVInt();
                if ((delta & 1) != 0)
                {
                    payloadLength[level] = skipStream.ReadVInt();
                }
                delta = SupportClass.Number.URShift(delta, 1);
            }
            else
            {
                delta = skipStream.ReadVInt();
            }
            freqPointer[level] += skipStream.ReadVInt();
            proxPointer[level] += skipStream.ReadVInt();

            return delta;
        }
示例#36
0
        /// <summary> </summary>
        /// <param name="field">The field to read in
        /// </param>
        /// <param name="tvfPointer">The pointer within the tvf file where we should start reading
        /// </param>
        /// <returns> The TermVector located at that position
        /// </returns>
        /// <throws>  IOException </throws>
        private SegmentTermVector ReadTermVector(System.String field, long tvfPointer)
        {
            // Now read the data from specified position
            //We don't need to offset by the FORMAT here since the pointer already includes the offset
            tvf.Seek(tvfPointer);

            int numTerms = tvf.ReadVInt();

            //System.out.println("Num Terms: " + numTerms);
            // If no terms - return a constant empty termvector. However, this should never occur!
            if (numTerms == 0)
            {
                return(new SegmentTermVector(field, null, null));
            }

            bool storePositions;
            bool storeOffsets;

            if (tvfFormat == TermVectorsWriter.FORMAT_VERSION)
            {
                byte bits = tvf.ReadByte();
                storePositions = (bits & TermVectorsWriter.STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & TermVectorsWriter.STORE_OFFSET_WITH_TERMVECTOR) != 0;
            }
            else
            {
                tvf.ReadVInt();
                storePositions = false;
                storeOffsets   = false;
            }

            System.String[] terms     = new System.String[numTerms];
            int[]           termFreqs = new int[numTerms];

            //  we may not need these, but declare them
            int[][] positions = null;
            TermVectorOffsetInfo[][] offsets = null;
            if (storePositions)
            {
                positions = new int[numTerms][];
            }
            if (storeOffsets)
            {
                offsets = new TermVectorOffsetInfo[numTerms][];
            }

            int start       = 0;
            int deltaLength = 0;
            int totalLength = 0;

            char[] buffer         = new char[10];     // init the buffer with a length of 10 character
            char[] previousBuffer = new char[] {};

            for (int i = 0; i < numTerms; i++)
            {
                start       = tvf.ReadVInt();
                deltaLength = tvf.ReadVInt();
                totalLength = start + deltaLength;
                if (buffer.Length < totalLength)
                {
                    // increase buffer
                    buffer = null;                     // give a hint to garbage collector
                    buffer = new char[totalLength];

                    if (start > 0)
                    {
                        // just copy if necessary
                        Array.Copy(previousBuffer, 0, buffer, 0, start);
                    }
                }

                tvf.ReadChars(buffer, start, deltaLength);
                terms[i]       = new System.String(buffer, 0, totalLength);
                previousBuffer = buffer;
                int freq = tvf.ReadVInt();
                termFreqs[i] = freq;

                if (storePositions)
                {
                    //read in the positions
                    int[] pos = new int[freq];
                    positions[i] = pos;
                    int prevPosition = 0;
                    for (int j = 0; j < freq; j++)
                    {
                        pos[j]       = prevPosition + tvf.ReadVInt();
                        prevPosition = pos[j];
                    }
                }

                if (storeOffsets)
                {
                    TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq];
                    offsets[i] = offs;
                    int prevOffset = 0;
                    for (int j = 0; j < freq; j++)
                    {
                        int startOffset = prevOffset + tvf.ReadVInt();
                        int endOffset   = startOffset + tvf.ReadVInt();
                        offs[j]    = new TermVectorOffsetInfo(startOffset, endOffset);
                        prevOffset = endOffset;
                    }
                }
            }

            SegmentTermVector tv;

            if (storePositions || storeOffsets)
            {
                tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
            }
            else
            {
                tv = new SegmentTermVector(field, terms, termFreqs);
            }
            return(tv);
        }
示例#37
0
        protected internal override int ReadSkipData(int level, IndexInput skipStream)
        {
            // if (DEBUG) {
            //   System.out.println("readSkipData level=" + level);
            // }
            int delta = skipStream.ReadVInt();
            // if (DEBUG) {
            //   System.out.println("  delta=" + delta);
            // }
            DocPointer_Renamed[level] += skipStream.ReadVInt();
            // if (DEBUG) {
            //   System.out.println("  docFP=" + docPointer[level]);
            // }

            if (PosPointer_Renamed != null)
            {
                PosPointer_Renamed[level] += skipStream.ReadVInt();
                // if (DEBUG) {
                //   System.out.println("  posFP=" + posPointer[level]);
                // }
                PosBufferUpto_Renamed[level] = skipStream.ReadVInt();
                // if (DEBUG) {
                //   System.out.println("  posBufferUpto=" + posBufferUpto[level]);
                // }

                if (PayloadByteUpto_Renamed != null)
                {
                    PayloadByteUpto_Renamed[level] = skipStream.ReadVInt();
                }

                if (PayPointer_Renamed != null)
                {
                    PayPointer_Renamed[level] += skipStream.ReadVInt();
                }
            }
            return delta;
        }
		public int NextPosition()
		{
			proxCount--;
			return position += proxStream.ReadVInt();
		}
        protected internal override int ReadSkipData(int level, IndexInput skipStream)
        {
            int delta;
            if (CurrentFieldStoresPayloads || CurrentFieldStoresOffsets)
            {
                // the current field stores payloads and/or offsets.
                // if the doc delta is odd then we have
                // to read the current payload/offset lengths
                // because it differs from the lengths of the
                // previous payload/offset
                delta = skipStream.ReadVInt();
                if ((delta & 1) != 0)
                {
                    if (CurrentFieldStoresPayloads)
                    {
                        PayloadLength_Renamed[level] = skipStream.ReadVInt();
                    }
                    if (CurrentFieldStoresOffsets)
                    {
                        OffsetLength_Renamed[level] = skipStream.ReadVInt();
                    }
                }
                delta = (int)((uint)delta >> 1);
            }
            else
            {
                delta = skipStream.ReadVInt();
            }

            FreqPointer_Renamed[level] += skipStream.ReadVInt();
            ProxPointer_Renamed[level] += skipStream.ReadVInt();

            return delta;
        }
 internal virtual SortedSetEntry ReadSortedSetEntry(IndexInput meta)
 {
     SortedSetEntry entry = new SortedSetEntry();
     if (Version >= Lucene45DocValuesFormat.VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED)
     {
         entry.Format = meta.ReadVInt();
     }
     else
     {
         entry.Format = Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES;
     }
     if (entry.Format != Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED && entry.Format != Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES)
     {
         throw new Exception("Unknown format: " + entry.Format + ", input=" + meta);
     }
     return entry;
 }
示例#41
0
 /// <summary>
 /// read as a d-gaps cleared bits list </summary>
 private void ReadClearedDgaps(IndexInput input)
 {
     Size_Renamed = input.ReadInt(); // (re)read size
     Count_Renamed = input.ReadInt(); // read count
     Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits
     for (int i = 0; i < Bits.Length; ++i)
     {
         Bits[i] = 0xff;
     }
     ClearUnusedBits();
     int last = 0;
     int numCleared = Size() - Count();
     while (numCleared > 0)
     {
         last += input.ReadVInt();
         Bits[last] = input.ReadByte();
         numCleared -= 8 - BitUtil.BitCount(Bits[last]);
         Debug.Assert(numCleared >= 0 || (last == (Bits.Length - 1) && numCleared == -(8 - (Size_Renamed & 7))));
     }
 }
        internal readonly PackedInts.Reader[] StartPointersDeltas; // delta from the avg

        #endregion Fields

        #region Constructors

        // It is the responsibility of the caller to close fieldsIndexIn after this constructor
        // has been called
        internal CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si)
        {
            MaxDoc = si.DocCount;
            int[] docBases = new int[16];
            long[] startPointers = new long[16];
            int[] avgChunkDocs = new int[16];
            long[] avgChunkSizes = new long[16];
            PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16];
            PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16];

            int packedIntsVersion = fieldsIndexIn.ReadVInt();

            int blockCount = 0;

            for (; ; )
            {
                int numChunks = fieldsIndexIn.ReadVInt();
                if (numChunks == 0)
                {
                    break;
                }
                if (blockCount == docBases.Length)
                {
                    int newSize = ArrayUtil.Oversize(blockCount + 1, 8);
                    docBases = Arrays.CopyOf(docBases, newSize);
                    startPointers = Arrays.CopyOf(startPointers, newSize);
                    avgChunkDocs = Arrays.CopyOf(avgChunkDocs, newSize);
                    avgChunkSizes = Arrays.CopyOf(avgChunkSizes, newSize);
                    docBasesDeltas = Arrays.CopyOf(docBasesDeltas, newSize);
                    startPointersDeltas = Arrays.CopyOf(startPointersDeltas, newSize);
                }

                // doc bases
                docBases[blockCount] = fieldsIndexIn.ReadVInt();
                avgChunkDocs[blockCount] = fieldsIndexIn.ReadVInt();
                int bitsPerDocBase = fieldsIndexIn.ReadVInt();
                if (bitsPerDocBase > 32)
                {
                    throw new CorruptIndexException("Corrupted bitsPerDocBase (resource=" + fieldsIndexIn + ")");
                }
                docBasesDeltas[blockCount] = PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase);

                // start pointers
                startPointers[blockCount] = fieldsIndexIn.ReadVLong();
                avgChunkSizes[blockCount] = fieldsIndexIn.ReadVLong();
                int bitsPerStartPointer = fieldsIndexIn.ReadVInt();
                if (bitsPerStartPointer > 64)
                {
                    throw new CorruptIndexException("Corrupted bitsPerStartPointer (resource=" + fieldsIndexIn + ")");
                }
                startPointersDeltas[blockCount] = PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer);

                ++blockCount;
            }

            this.DocBases = Arrays.CopyOf(docBases, blockCount);
            this.StartPointers = Arrays.CopyOf(startPointers, blockCount);
            this.AvgChunkDocs = Arrays.CopyOf(avgChunkDocs, blockCount);
            this.AvgChunkSizes = Arrays.CopyOf(avgChunkSizes, blockCount);
            this.DocBasesDeltas = Arrays.CopyOf(docBasesDeltas, blockCount);
            this.StartPointersDeltas = Arrays.CopyOf(startPointersDeltas, blockCount);
        }