private void ReadFields(IndexInput meta) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { numerics[fieldNumber] = ReadNumericEntry(meta); } else if (fieldType == BYTES) { binaries[fieldNumber] = ReadBinaryEntry(meta); } else if (fieldType == SORTED) { sorteds[fieldNumber] = ReadSortedEntry(meta); } else if (fieldType == SORTED_SET) { sortedSets[fieldNumber] = ReadSortedSetEntry(meta); } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt(); } }
public void Read(IndexInput input, FieldInfos fieldInfos) { this.term = null; // invalidate cache int start = input.ReadVInt(); int length = input.ReadVInt(); int totalLength = start + length; if (preUTF8Strings) { text.SetLength(totalLength); input.ReadChars(text.result, start, length); } else { if (dirty) { // Fully convert all bytes since bytes is dirty UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes); bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text); dirty = false; } else { // Incrementally convert only the UTF8 bytes that are new: bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text); } } this.field = fieldInfos.FieldName(input.ReadVInt()); }
public override void VisitDocument(int n, StoredFieldVisitor visitor) { SeekIndex(n); FieldsStream.Seek(IndexStream.ReadLong()); int numFields = FieldsStream.ReadVInt(); for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) { int fieldNumber = FieldsStream.ReadVInt(); FieldInfo fieldInfo = FieldInfos.FieldInfo(fieldNumber); int bits = FieldsStream.ReadByte() & 0xFF; Debug.Assert(bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY), "bits=" + bits.ToString("x")); switch (visitor.NeedsField(fieldInfo)) { case StoredFieldVisitor.Status.YES: ReadField(visitor, fieldInfo, bits); break; case StoredFieldVisitor.Status.NO: SkipField(bits); break; case StoredFieldVisitor.Status.STOP: return; } } }
/// <summary> /// Increments the enumeration to the next element. True if one exists. </summary> public bool Next() { PrevBuffer.Set(TermBuffer); //System.out.println(" ste setPrev=" + prev() + " this=" + this); if (Position++ >= Size - 1) { TermBuffer.Reset(); //System.out.println(" EOF"); return(false); } TermBuffer.Read(Input, FieldInfos); NewSuffixStart = TermBuffer.NewSuffixStart; TermInfo_Renamed.DocFreq = Input.ReadVInt(); // read doc freq TermInfo_Renamed.FreqPointer += Input.ReadVLong(); // read freq pointer TermInfo_Renamed.ProxPointer += Input.ReadVLong(); // read prox pointer if (TermInfo_Renamed.DocFreq >= SkipInterval) { TermInfo_Renamed.SkipOffset = Input.ReadVInt(); } if (IsIndex) { IndexPointer += Input.ReadVLong(); // read index pointer } //System.out.println(" ste ret term=" + term()); return(true); }
public void Read(IndexInput input, FieldInfos fieldInfos) { this.Term = null; // invalidate cache NewSuffixStart = input.ReadVInt(); int length = input.ReadVInt(); int totalLength = NewSuffixStart + length; Debug.Assert(totalLength <= ByteBlockPool.BYTE_BLOCK_SIZE - 2, "termLength=" + totalLength + ",resource=" + input); if (Bytes.Bytes.Length < totalLength) { Bytes.Grow(totalLength); } Bytes.Length = totalLength; input.ReadBytes(Bytes.Bytes, NewSuffixStart, length); int fieldNumber = input.ReadVInt(); if (fieldNumber != CurrentFieldNumber) { CurrentFieldNumber = fieldNumber; // NOTE: too much sneakiness here, seriously this is a negative vint?! if (CurrentFieldNumber == -1) { Field = ""; } else { Debug.Assert(fieldInfos.FieldInfo(CurrentFieldNumber) != null, CurrentFieldNumber.ToString()); Field = String.Intern(fieldInfos.FieldInfo(CurrentFieldNumber).Name); } } else { Debug.Assert(Field.Equals(fieldInfos.FieldInfo(fieldNumber).Name), "currentFieldNumber=" + CurrentFieldNumber + " field=" + Field + " vs " + fieldInfos.FieldInfo(fieldNumber) == null ? "null" : fieldInfos.FieldInfo(fieldNumber).Name); } }
protected internal override int ReadSkipData(int level, IndexInput skipStream) { int delta; if (currentFieldStoresPayloads) { // the current field stores payloads. // if the doc delta is odd then we have // to read the current payload length // because it differs from the length of the // previous payload delta = skipStream.ReadVInt(); if ((delta & 1) != 0) { payloadLength[level] = skipStream.ReadVInt(); } delta = Number.URShift(delta, 1); } else { delta = skipStream.ReadVInt(); } freqPointer[level] += skipStream.ReadVInt(); proxPointer[level] += skipStream.ReadVInt(); return(delta); }
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context) { _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); var success = false; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); try { _version = ReadHeader(_input); _indexDivisor = indexDivisor; if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) CodecUtil.ChecksumEntireFile(_input); SeekDir(_input, _dirOffset); // Read directory var numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")"); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var indexStart = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(indexStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } } } }
protected internal override int ReadSkipData(int level, IndexInput skipStream) { int delta; if (CurrentFieldStoresPayloads) { // the current field stores payloads. // if the doc delta is odd then we have // to read the current payload length // because it differs from the length of the // previous payload delta = skipStream.ReadVInt(); if ((delta & 1) != 0) { PayloadLength_Renamed[level] = skipStream.ReadVInt(); } delta = (int)((uint)delta >> 1); } else { delta = skipStream.ReadVInt(); } FreqPointer_Renamed[level] += skipStream.ReadVInt(); ProxPointer_Renamed[level] += skipStream.ReadVInt(); return(delta); }
public void Read(IndexInput input, FieldInfos fieldInfos) { this.Term = null; // invalidate cache NewSuffixStart = input.ReadVInt(); int length = input.ReadVInt(); int totalLength = NewSuffixStart + length; Debug.Assert(totalLength <= ByteBlockPool.BYTE_BLOCK_SIZE - 2, "termLength=" + totalLength + ",resource=" + input); if (Bytes.Bytes.Length < totalLength) { Bytes.Grow(totalLength); } Bytes.Length = totalLength; input.ReadBytes(Bytes.Bytes, NewSuffixStart, length); int fieldNumber = input.ReadVInt(); if (fieldNumber != CurrentFieldNumber) { CurrentFieldNumber = fieldNumber; // NOTE: too much sneakiness here, seriously this is a negative vint?! if (CurrentFieldNumber == -1) { Field = ""; } else { Debug.Assert(fieldInfos.FieldInfo(CurrentFieldNumber) != null, CurrentFieldNumber.ToString()); Field = String.Intern(fieldInfos.FieldInfo(CurrentFieldNumber).Name); } } else { Debug.Assert(Field.Equals(fieldInfos.FieldInfo(fieldNumber).Name), "currentFieldNumber=" + CurrentFieldNumber + " field=" + Field + " vs " + fieldInfos.FieldInfo(fieldNumber) == null ? "null" : fieldInfos.FieldInfo(fieldNumber).Name); } }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616 if (fieldNumber < 0) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new Exception("Invalid field number: " + fieldNumber + " (resource=" + meta + ")"); } byte type = meta.ReadByte(); if (type == Lucene45DocValuesFormat.NUMERIC) { Numerics[fieldNumber] = ReadNumericEntry(meta); } else if (type == Lucene45DocValuesFormat.BINARY) { BinaryEntry b = ReadBinaryEntry(meta); Binaries[fieldNumber] = b; } else if (type == Lucene45DocValuesFormat.SORTED) { ReadSortedField(fieldNumber, meta, infos); } else if (type == Lucene45DocValuesFormat.SORTED_SET) { SortedSetEntry ss = ReadSortedSetEntry(meta); SortedSets[fieldNumber] = ss; if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES) { ReadSortedSetFieldWithAddresses(fieldNumber, meta, infos); } else if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED) { if (meta.ReadVInt() != fieldNumber) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.SORTED) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } ReadSortedField(fieldNumber, meta, infos); } else { throw new Exception(); } } else { throw new Exception("invalid type: " + type + ", resource=" + meta); } fieldNumber = meta.ReadVInt(); } }
private void ReadSortedField(int fieldNumber, IndexInput meta, FieldInfos infos) { // sorted = binary + numeric if (meta.ReadVInt() != fieldNumber) { throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.BINARY) { throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } BinaryEntry b = ReadBinaryEntry(meta); Binaries[fieldNumber] = b; if (meta.ReadVInt() != fieldNumber) { throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC) { throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n = ReadNumericEntry(meta); Ords[fieldNumber] = n; }
/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { // Load the index into memory string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION); indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingTermVectorsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); if (version >= CompressingTermVectorsWriter.VERSION_CHECKSUM) { indexStream.ReadVLong(); // the end of the data file CodecUtil.CheckFooter(indexStream); } else { CodecUtil.CheckEOF(indexStream); } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata string vectorsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_EXTENSION); vectorsStream = d.OpenInput(vectorsStreamFN, context); string codecNameDat = formatName + CompressingTermVectorsWriter.CODEC_SFX_DAT; int version2 = CodecUtil.CheckHeader(vectorsStream, codecNameDat, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); if (version != version2) { throw new Exception("Version mismatch between stored fields index and data: " + version + " != " + version2); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.FilePointer); packedIntsVersion = vectorsStream.ReadVInt(); chunkSize = vectorsStream.ReadVInt(); decompressor = compressionMode.NewDecompressor(); this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this, indexStream); } } }
public int ReadBlock() { buffer[0] = input.ReadVInt(); int count = buffer[0] <= 3 ? baseBlockSize - 1 : 2 * baseBlockSize - 1; Debug.Assert(buffer.Length >= count, "buffer.length=" + buffer.Length + " count=" + count); for (int i = 0; i < count; i++) { buffer[i + 1] = input.ReadVInt(); } return(1 + count); }
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) { string termsFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTTermsWriter.TERMS_EXTENSION); this.postingsReader = postingsReader; IndexInput @in = state.Directory.OpenInput(termsFileName, state.Context); bool success = false; try { version = ReadHeader(@in); if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(@in); } this.postingsReader.Init(@in); SeekDir(@in); FieldInfos fieldInfos = state.FieldInfos; int numFields = @in.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = @in.ReadVInt(); FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); long numTerms = @in.ReadVLong(); long sumTotalTermFreq = fieldInfo.FieldIndexOptions == IndexOptions.DOCS_ONLY ? -1 : @in.ReadVLong(); long sumDocFreq = @in.ReadVLong(); int docCount = @in.ReadVInt(); int longsSize = @in.ReadVInt(); TermsReader current = new TermsReader(this, fieldInfo, @in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); TermsReader previous; // LUCENENET NOTE: This simulates a put operation in Java, // getting the prior value first before setting it. fields.TryGetValue(fieldInfo.Name, out previous); fields[fieldInfo.Name] = current; CheckFieldSummary(state.SegmentInfo, @in, current, previous); } success = true; } finally { if (success) { IOUtils.Close(@in); } else { IOUtils.CloseWhileHandlingException(@in); } } }
public /*internal*/ Document.Document Doc(int n, FieldSelector fieldSelector) { SeekIndex(n); long position = indexStream.ReadLong(); fieldsStream.Seek(position); var doc = new Document.Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name); byte bits = fieldsStream.ReadByte(); System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; System.Diagnostics.Debug.Assert( (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)), "compressed fields are only allowed in indexes of version <= 2.9"); bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; //TODO: Find an alternative approach here if this list continues to grow beyond the //list of 5 or 6 currently here. See Lucene 762 for discussion if (acceptField.Equals(FieldSelectorResult.LOAD)) { AddField(doc, fi, binary, compressed, tokenize); } else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK)) { AddField(doc, fi, binary, compressed, tokenize); break; //Get out of this loop } else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD)) { AddFieldLazy(doc, fi, binary, compressed, tokenize); } else if (acceptField.Equals(FieldSelectorResult.SIZE)) { SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed)); } else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK)) { AddFieldSize(doc, fi, binary, compressed); break; } else { SkipField(binary, compressed); } } return(doc); }
public override int NextDoc() { if (++docIt >= upto) { return(DocIdSetIterator.NO_MORE_DOCS); } postingInput.Seek(offsets[docIt]); currFreq = postingInput.ReadVInt(); // reset variables used in nextPosition pos = 0; endOffset = 0; return(docs[docIt]); }
private void Read(IndexInput input, String fileName) { int firstInt = input.ReadVInt(); if (firstInt < 0) { // This is a real format format = firstInt; } else { format = FORMAT_PRE; } if (format != FORMAT_PRE & format != FORMAT_START) { throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\""); } int size; if (format == FORMAT_PRE) { size = firstInt; } else { size = input.ReadVInt(); //read in the size } for (int i = 0; i < size; i++) { String name = StringHelper.Intern(input.ReadString()); byte bits = input.ReadByte(); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; bool storePayloads = (bits & STORE_PAYLOADS) != 0; bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } if (input.FilePointer != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length()); } }
internal static BinaryEntry ReadBinaryEntry(IndexInput meta) { BinaryEntry entry = new BinaryEntry(); entry.Format = meta.ReadVInt(); entry.MissingOffset = meta.ReadLong(); entry.MinLength = meta.ReadVInt(); entry.MaxLength = meta.ReadVInt(); entry.Count = meta.ReadVLong(); entry.Offset = meta.ReadLong(); switch (entry.Format) { case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED: break; case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED: entry.AddressInterval = meta.ReadVInt(); entry.AddressesOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); break; case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED: entry.AddressesOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); break; default: throw new Exception("Unknown format: " + entry.Format + ", input=" + meta); } return(entry); }
private BytesRef DoNext() { if (++currentOrd >= OuterInstance.NumValues) { return(null); } else { int start = Input.ReadVInt(); int suffix = Input.ReadVInt(); Input.ReadBytes(termBuffer.Bytes, start, suffix); termBuffer.Length = start + suffix; return(termBuffer); } }
public override int NextPosition() { if (LazyProxPointer != -1) { ProxIn.Seek(LazyProxPointer); LazyProxPointer = -1; } // scan over any docs that were iterated without their positions if (PosPendingCount > Freq_Renamed) { Position = 0; while (PosPendingCount != Freq_Renamed) { if ((ProxIn.ReadByte() & 0x80) == 0) { PosPendingCount--; } } } Position += ProxIn.ReadVInt(); PosPendingCount--; Debug.Assert(PosPendingCount >= 0, "nextPosition() was called too many times (more than freq() times) posPendingCount=" + PosPendingCount); return(Position); }
public void ReadBlock() { for (int i = 0; i < buffer.Length; i++) { buffer[i] = @in.ReadVInt(); } }
protected internal override int NextUnreadDoc() { int docAcc = Accum; int frq = 1; IndexInput freqIn = this.FreqIn; bool omitTF = IndexOmitsTF; int loopLimit = Limit; Bits liveDocs = this.LiveDocs; for (int i = Ord; i < loopLimit; i++) { int code = freqIn.ReadVInt(); if (omitTF) { docAcc += code; } else { docAcc += (int)((uint)code >> 1); // shift off low bit frq = ReadFreq(freqIn, code); } if (liveDocs.Get(docAcc)) { Freq_Renamed = frq; Ord = i + 1; return(Accum = docAcc); } } Ord = Limit; Freq_Renamed = frq; Accum = docAcc; return(NO_MORE_DOCS); }
public override FieldsProducer FieldsProducer(SegmentReadState readState) { // Load our ID: string idFileName = IndexFileNames.SegmentFileName(readState.SegmentInfo.Name, readState.SegmentSuffix, ID_EXTENSION); IndexInput @in = readState.Directory.OpenInput(idFileName, readState.Context); bool success = false; int id; try { CodecUtil.CheckHeader(@in, RAM_ONLY_NAME, VERSION_START, VERSION_LATEST); id = @in.ReadVInt(); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(@in); } else { IOUtils.Close(@in); } } lock (State) { return(State[id]); } }
protected override int ReadSkipData(int level, IndexInput skipStream) { int delta; Debug.Assert(_indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !_currentFieldStoresPayloads); if (_currentFieldStoresPayloads) { // the current field stores payloads. // if the doc delta is odd then we have // to read the current payload length // because it differs from the length of the // previous payload delta = skipStream.ReadVInt(); if ((delta & 1) != 0) { _payloadLength[level] = skipStream.ReadVInt(); } delta = (int)((uint)delta >> 1); } else { delta = skipStream.ReadVInt(); } if (_indexOptions != FieldInfo.IndexOptions.DOCS_ONLY) { _freqIndex[level].Read(skipStream, false); } _docIndex[level].Read(skipStream, false); if (_indexOptions != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { return(delta); } _posIndex[level].Read(skipStream, false); if (_currentFieldStoresPayloads) { _payloadPointer[level] += skipStream.ReadVInt(); } return(delta); }
public override void Init(IndexInput termsIn) { _version = CodecUtil.CheckHeader(termsIn, PulsingPostingsWriter.CODEC, PulsingPostingsWriter.VERSION_START, PulsingPostingsWriter.VERSION_CURRENT); _maxPositions = termsIn.ReadVInt(); _wrappedPostingsReader.Init(termsIn); if (_wrappedPostingsReader is PulsingPostingsReader || _version < PulsingPostingsWriter.VERSION_META_ARRAY) { _fields = null; } else { _fields = new SortedDictionary <int, int>(); var summaryFileName = IndexFileNames.SegmentFileName(_segmentState.SegmentInfo.Name, _segmentState.SegmentSuffix, PulsingPostingsWriter.SUMMARY_EXTENSION); IndexInput input = null; try { input = _segmentState.Directory.OpenInput(summaryFileName, _segmentState.Context); CodecUtil.CheckHeader(input, PulsingPostingsWriter.CODEC, _version, PulsingPostingsWriter.VERSION_CURRENT); var numField = input.ReadVInt(); for (var i = 0; i < numField; i++) { var fieldNum = input.ReadVInt(); var longsSize = input.ReadVInt(); _fields.Add(fieldNum, longsSize); } } finally { IOUtils.CloseWhileHandlingException(input); } } }
public TermsReader(FieldInfos fieldInfos, IndexInput @in, int termCount) { this.termCount = termCount; int fieldNumber = @in.ReadVInt(); field = fieldInfos.FieldInfo(fieldNumber); if (field.FieldIndexOptions != IndexOptions.DOCS_ONLY) { sumTotalTermFreq = @in.ReadVLong(); } else { sumTotalTermFreq = -1; } sumDocFreq = @in.ReadVLong(); docCount = @in.ReadVInt(); fst = new FST <BytesRef>(@in, outputs); }
private int ReadDeltaPosition() { int delta = proxStream.ReadVInt(); if (currentFieldStoresPayloads) { // if the current field stores payloads then // the position delta is shifted one bit to the left. // if the LSB is set, then we have to read the current // payload length if ((delta & 1) != 0) { payloadLength = proxStream.ReadVInt(); } delta = Number.URShift(delta, 1); needToLoadPayload = true; } return(delta); }
public bool MoveNext() { if (input.FilePointer < input.Length()) { int code = input.ReadVInt(); if ((code & 1) != 0) { field = input.ReadString(); } int prefix = Number.URShift(code, 1); int suffix = input.ReadVInt(); bytes.Grow(prefix + suffix); input.ReadBytes(bytes.Bytes, prefix, suffix); bytes.Length = prefix + suffix; term.Set(field, bytes); return(true); } return(false); }
internal static NumericEntry ReadNumericEntry(IndexInput meta) { NumericEntry entry = new NumericEntry(); entry.Format = meta.ReadVInt(); entry.MissingOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.Offset = meta.ReadLong(); entry.Count = meta.ReadVLong(); entry.BlockSize = meta.ReadVInt(); switch (entry.Format) { case Lucene45DocValuesConsumer.GCD_COMPRESSED: entry.MinValue = meta.ReadLong(); entry.Gcd = meta.ReadLong(); break; case Lucene45DocValuesConsumer.TABLE_COMPRESSED: if (entry.Count > int.MaxValue) { throw new Exception("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta); } int uniqueValues = meta.ReadVInt(); if (uniqueValues > 256) { throw new Exception("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta); } entry.Table = new long[uniqueValues]; for (int i = 0; i < uniqueValues; ++i) { entry.Table[i] = meta.ReadLong(); } break; case Lucene45DocValuesConsumer.DELTA_COMPRESSED: break; default: throw new Exception("Unknown format: " + entry.Format + ", input=" + meta); } return(entry); }
/// <summary>Increments the enumeration to the next element. True if one exists.</summary> public override bool Next() { if (position++ >= size - 1) { prevBuffer.Set(termBuffer); termBuffer.Reset(); return(false); } prevBuffer.Set(termBuffer); termBuffer.Read(input, fieldInfos); termInfo.docFreq = input.ReadVInt(); // read doc freq termInfo.freqPointer += input.ReadVLong(); // read freq pointer termInfo.proxPointer += input.ReadVLong(); // read prox pointer if (format == -1) { // just read skipOffset in order to increment file pointer; // value is never used since skipTo is switched off if (!isIndex) { if (termInfo.docFreq > formatM1SkipInterval) { termInfo.skipOffset = input.ReadVInt(); } } } else { if (termInfo.docFreq >= skipInterval) { termInfo.skipOffset = input.ReadVInt(); } } if (isIndex) { indexPointer += input.ReadVLong(); // read index pointer } return(true); }
internal int ReadFreq(IndexInput freqIn, int code) { if ((code & 1) != 0) // if low bit is set { return(1); // freq is one } else { return(freqIn.ReadVInt()); // else read freq } }
public CompoundFileReader(Directory dir, System.String name, int readBufferSize) { directory = dir; fileName = name; this.readBufferSize = readBufferSize; bool success = false; try { stream = dir.OpenInput(name, readBufferSize); // read the directory and init files int count = stream.ReadVInt(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); System.String id = stream.ReadString(); if (entry != null) { // set length of the previous entry entry.length = offset - entry.offset; } entry = new FileEntry { offset = offset }; entries[id] = entry; } // set the length of the final entry if (entry != null) { entry.length = stream.Length() - entry.offset; } success = true; } finally { if (!success && (stream != null)) { try { stream.Close(); } catch (System.IO.IOException) { } } } }
public override void Init(IndexInput termsIn) { _version = CodecUtil.CheckHeader(termsIn, PulsingPostingsWriter.CODEC, PulsingPostingsWriter.VERSION_START, PulsingPostingsWriter.VERSION_CURRENT); _maxPositions = termsIn.ReadVInt(); _wrappedPostingsReader.Init(termsIn); if (_wrappedPostingsReader is PulsingPostingsReader || _version < PulsingPostingsWriter.VERSION_META_ARRAY) { _fields = null; } else { _fields = new SortedDictionary<int, int>(); var summaryFileName = IndexFileNames.SegmentFileName(_segmentState.SegmentInfo.Name, _segmentState.SegmentSuffix, PulsingPostingsWriter.SUMMARY_EXTENSION); IndexInput input = null; try { input = _segmentState.Directory.OpenInput(summaryFileName, _segmentState.Context); CodecUtil.CheckHeader(input, PulsingPostingsWriter.CODEC, _version, PulsingPostingsWriter.VERSION_CURRENT); var numField = input.ReadVInt(); for (var i = 0; i < numField; i++) { var fieldNum = input.ReadVInt(); var longsSize = input.ReadVInt(); _fields.Add(fieldNum, longsSize); } } finally { IOUtils.CloseWhileHandlingException(input); } } }
/// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.CompressionMode_Renamed = compressionMode; string segment = si.Name; bool success = false; FieldInfos = fn; NumDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION); // Load the index into memory indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX; Version_Renamed = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer); IndexReader = new CompressingStoredFieldsIndexReader(indexStream, si); long maxPointer = -1; if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { maxPointer = indexStream.ReadVLong(); CodecUtil.CheckFooter(indexStream); } else { CodecUtil.CheckEOF(indexStream); } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata FieldsStream = d.OpenInput(fieldsStreamFN, context); if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { if (maxPointer + CodecUtil.FooterLength() != FieldsStream.Length()) { throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + FieldsStream.Length()); } } else { maxPointer = FieldsStream.Length(); } this.MaxPointer = maxPointer; string codecNameDat = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT; int fieldsVersion = CodecUtil.CheckHeader(FieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); if (Version_Renamed != fieldsVersion) { throw new CorruptIndexException("Version mismatch between stored fields index and data: " + Version_Renamed + " != " + fieldsVersion); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == FieldsStream.FilePointer); if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS) { ChunkSize_Renamed = FieldsStream.ReadVInt(); } else { ChunkSize_Renamed = -1; } PackedIntsVersion = FieldsStream.ReadVInt(); Decompressor = compressionMode.NewDecompressor(); this.Bytes = new BytesRef(); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this, indexStream); } } }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, IComparer<BytesRef> termComp, String segmentSuffix, IOContext context) { _termComp = termComp; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) CodecUtil.ChecksumEntireFile(_input); indexInterval = _input.ReadInt(); if (indexInterval < 1) { throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}", indexInterval, _input)); } _indexDivisor = indexDivisor; if (indexDivisor < 0) { _totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand _totalIndexInterval = indexInterval*indexDivisor; } Debug.Assert(_totalIndexInterval > 0); SeekDir(_input, _dirOffset); // Read directory int numFields = _input.ReadVInt(); if (numFields < 0) throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields, _input)); for (int i = 0; i < numFields; i++) { int field = _input.ReadVInt(); int numIndexTerms = _input.ReadVInt(); if (numIndexTerms < 0) throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}", numIndexTerms, _input)); long termsStart = _input.ReadVLong(); long indexStart = _input.ReadVLong(); long packedIndexStart = _input.ReadVLong(); long packedOffsetsStart = _input.ReadVLong(); if (packedIndexStart < indexStart) throw new CorruptIndexException( String.Format( "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}", packedIndexStart, indexStart, numIndexTerms, _input)); FieldInfo fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(_input); } if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) _indexLoaded = true; _termBytesReader = _termBytes.Freeze(true); } } }
public TermsReader(FieldInfos fieldInfos, IndexInput @in, int termCount) { this.termCount = termCount; int fieldNumber = @in.ReadVInt(); field = fieldInfos.FieldInfo(fieldNumber); if (field.FieldIndexOptions != IndexOptions.DOCS_ONLY) { sumTotalTermFreq = @in.ReadVLong(); } else { sumTotalTermFreq = -1; } sumDocFreq = @in.ReadVLong(); docCount = @in.ReadVInt(); fst = new FST<BytesRef>(@in, outputs); }
/// <summary> /// Sole constructor. </summary> public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext ioContext, string segmentSuffix, int indexDivisor) { if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; } this.PostingsReader = postingsReader; this.Segment = info.Name; @in = dir.OpenInput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION), ioContext); bool success = false; IndexInput indexIn = null; try { Version = ReadHeader(@in); if (indexDivisor != -1) { indexIn = dir.OpenInput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION), ioContext); int indexVersion = ReadIndexHeader(indexIn); if (indexVersion != Version) { throw new CorruptIndexException("mixmatched version files: " + @in + "=" + Version + "," + indexIn + "=" + indexVersion); } } // verify if (indexIn != null && Version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(indexIn); } // Have PostingsReader init itself postingsReader.Init(@in); // Read per-field details SeekDir(@in, DirOffset); if (indexDivisor != -1) { SeekDir(indexIn, IndexDirOffset); } int numFields = @in.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + @in + ")"); } for (int i = 0; i < numFields; i++) { int field = @in.ReadVInt(); long numTerms = @in.ReadVLong(); Debug.Assert(numTerms >= 0); int numBytes = @in.ReadVInt(); BytesRef rootCode = new BytesRef(new byte[numBytes]); @in.ReadBytes(rootCode.Bytes, 0, numBytes); rootCode.Length = numBytes; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null, "field=" + field); long sumTotalTermFreq = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : @in.ReadVLong(); long sumDocFreq = @in.ReadVLong(); int docCount = @in.ReadVInt(); int longsSize = Version >= BlockTreeTermsWriter.VERSION_META_ARRAY ? @in.ReadVInt() : 0; if (docCount < 0 || docCount > info.DocCount) // #docs with field must be <= #docs { throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.DocCount + " (resource=" + @in + ")"); } if (sumDocFreq < docCount) // #postings must be >= #docs with field { throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + @in + ")"); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) // #positions must be >= #postings { throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + @in + ")"); } long indexStartFP = indexDivisor != -1 ? indexIn.ReadVLong() : 0; if (Fields.ContainsKey(fieldInfo.Name)) { throw new CorruptIndexException("duplicate field: " + fieldInfo.Name + " (resource=" + @in + ")"); } else { Fields[fieldInfo.Name] = new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn); } } if (indexDivisor != -1) { indexIn.Dispose(); } success = true; } finally { if (!success) { // this.close() will close in: IOUtils.CloseWhileHandlingException(indexIn, this); } } }
protected override int ReadSkipData(int level, IndexInput skipStream) { int delta; Debug.Assert(_indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !_currentFieldStoresPayloads); if (_currentFieldStoresPayloads) { // the current field stores payloads. // if the doc delta is odd then we have // to read the current payload length // because it differs from the length of the // previous payload delta = skipStream.ReadVInt(); if ((delta & 1) != 0) { _payloadLength[level] = skipStream.ReadVInt(); } delta = (int) ((uint) delta >> 1); } else { delta = skipStream.ReadVInt(); } if (_indexOptions != FieldInfo.IndexOptions.DOCS_ONLY) _freqIndex[level].Read(skipStream, false); _docIndex[level].Read(skipStream, false); if (_indexOptions != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) return delta; _posIndex[level].Read(skipStream, false); if (_currentFieldStoresPayloads) _payloadPointer[level] += skipStream.ReadVInt(); return delta; }
internal int ReadFreq(IndexInput freqIn, int code) { if ((code & 1) != 0) // if low bit is set { return 1; // freq is one } else { return freqIn.ReadVInt(); // else read freq } }
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context, String segmentSuffix) { _postingsReader = postingsReader; _input = dir.OpenInput( IndexFileNames.SegmentFileName(info.Name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); // Have PostingsReader init itself postingsReader.Init(_input); // Read per-field details SeekDir(_input, _dirOffset); int numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException(String.Format("Invalid number of fields: {0}, Resource: {1}", numFields, _input)); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var numTerms = _input.ReadVLong(); Debug.Assert(numTerms >= 0); var termsStartPointer = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); var sumTotalTermFreq = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : _input.ReadVLong(); var sumDocFreq = _input.ReadVLong(); var docCount = _input.ReadVInt(); var longsSize = _version >= BlockTermsWriter.VERSION_META_ARRAY ? _input.ReadVInt() : 0; if (docCount < 0 || docCount > info.DocCount) { // #docs with field must be <= #docs throw new CorruptIndexException( String.Format("Invalid DocCount: {0}, MaxDoc: {1}, Resource: {2}", docCount, info.DocCount, _input)); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( String.Format("Invalid sumDocFreq: {0}, DocCount: {1}, Resource: {2}", sumDocFreq, docCount, _input)); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( String.Format("Invalid sumTotalTermFreq: {0}, sumDocFreq: {1}, Resource: {2}", sumTotalTermFreq, sumDocFreq, _input)); } try { _fields.Add(fieldInfo.Name, new FieldReader(fieldInfo, this, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate fields: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { _input.Dispose(); } } _indexReader = indexReader; }