public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context) { _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); var success = false; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); try { _version = ReadHeader(_input); _indexDivisor = indexDivisor; if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) CodecUtil.ChecksumEntireFile(_input); SeekDir(_input, _dirOffset); // Read directory var numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")"); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var indexStart = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(indexStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } } } }
//private readonly string segment; // LUCENENET: Not used public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, string segment, int indexDivisor, string segmentSuffix, IOContext context) { input = dir.OpenInput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); //this.segment = segment; // LUCENENET: Not used bool success = false; if (Debugging.AssertsEnabled) { Debugging.Assert(indexDivisor == -1 || indexDivisor > 0); } try { version = ReadHeader(input); this.indexDivisor = indexDivisor; if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(input); } SeekDir(input, dirOffset); // Read directory int numFields = input.ReadVInt32(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + input + ")"); } for (int i = 0; i < numFields; i++) { int field = input.ReadVInt32(); long indexStart = input.ReadVInt64(); FieldInfo fieldInfo = fieldInfos.FieldInfo(field); FieldIndexData previous = fields.Put(fieldInfo, new FieldIndexData(this, fieldInfo, indexStart)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.Name + " (resource=" + input + ")"); } } success = true; } finally { if (indexDivisor > 0) { input.Dispose(); input = null; if (success) { indexLoaded = true; } } } }
/// <summary> /// note: -1 is the empty field: "" !!!! </summary> internal static string FieldName(FieldInfos infos, int fieldNumber) { if (fieldNumber == -1) { return(""); } else { return(infos.FieldInfo(fieldNumber).Name); } }
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) { string termsFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTTermsWriter.TERMS_EXTENSION); this.postingsReader = postingsReader; IndexInput @in = state.Directory.OpenInput(termsFileName, state.Context); bool success = false; try { version = ReadHeader(@in); if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(@in); } this.postingsReader.Init(@in); SeekDir(@in); FieldInfos fieldInfos = state.FieldInfos; int numFields = @in.ReadVInt32(); for (int i = 0; i < numFields; i++) { int fieldNumber = @in.ReadVInt32(); FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); long numTerms = @in.ReadVInt64(); long sumTotalTermFreq = fieldInfo.IndexOptions == IndexOptions.DOCS_ONLY ? -1 : @in.ReadVInt64(); long sumDocFreq = @in.ReadVInt64(); int docCount = @in.ReadVInt32(); int longsSize = @in.ReadVInt32(); TermsReader current = new TermsReader(this, fieldInfo, @in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); TermsReader previous; // LUCENENET NOTE: This simulates a put operation in Java, // getting the prior value first before setting it. fields.TryGetValue(fieldInfo.Name, out previous); fields[fieldInfo.Name] = current; CheckFieldSummary(state.SegmentInfo, @in, current, previous); } success = true; } finally { if (success) { IOUtils.Dispose(@in); } else { IOUtils.DisposeWhileHandlingException(@in); } } }
public override Terms GetTerms(string field) { Terms terms = this.m_input.GetTerms(field); if (terms is null) { return(null); } else { return(new SortingTerms(terms, infos.FieldInfo(field).IndexOptions, docMap)); } }
public TermsReader(FieldInfos fieldInfos, IndexInput @in, int termCount) { this.termCount = termCount; int fieldNumber = @in.ReadVInt32(); field = fieldInfos.FieldInfo(fieldNumber); if (field.IndexOptions != IndexOptions.DOCS_ONLY) { sumTotalTermFreq = @in.ReadVInt64(); } else { sumTotalTermFreq = -1; } sumDocFreq = @in.ReadVInt64(); docCount = @in.ReadVInt32(); fst = new FST <BytesRef>(@in, outputs); }
internal virtual void Seek(TermInfo ti, Term term) { Count = 0; FieldInfo fi = FieldInfos.FieldInfo(term.Field); this.IndexOptions = (fi != null) ? fi.FieldIndexOptions : FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; CurrentFieldStoresPayloads = (fi != null) && fi.HasPayloads(); if (ti == null) { Df = 0; } else { Df = ti.DocFreq; Doc_Renamed = 0; FreqBasePointer = ti.FreqPointer; ProxBasePointer = ti.ProxPointer; SkipPointer = FreqBasePointer + ti.SkipOffset; FreqStream.Seek(FreqBasePointer); HaveSkipped = false; } }
internal virtual void Seek(TermInfo ti, Term term) { m_count = 0; FieldInfo fi = fieldInfos.FieldInfo(term.Field); this.m_indexOptions = (fi != null) ? fi.IndexOptions : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; m_currentFieldStoresPayloads = (fi != null) && fi.HasPayloads; if (ti == null) { m_df = 0; } else { m_df = ti.DocFreq; doc = 0; freqBasePointer = ti.FreqPointer; proxBasePointer = ti.ProxPointer; skipPointer = freqBasePointer + ti.SkipOffset; m_freqStream.Seek(freqBasePointer); haveSkipped = false; } }
/// <summary> /// Sugar method for <see cref="StartDocument(int)"/> + <see cref="WriteField(FieldInfo, IIndexableField)"/> /// for every stored field in the document. </summary> protected void AddDocument <T1>(IEnumerable <T1> doc, FieldInfos fieldInfos) where T1 : Lucene.Net.Index.IIndexableField { int storedCount = 0; foreach (IIndexableField field in doc) { if (field.FieldType.IsStored) { storedCount++; } } StartDocument(storedCount); foreach (IIndexableField field in doc) { if (field.FieldType.IsStored) { WriteField(fieldInfos.FieldInfo(field.Name), field); } } FinishDocument(); }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, IComparer<BytesRef> termComp, String segmentSuffix, IOContext context) { _termComp = termComp; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) CodecUtil.ChecksumEntireFile(_input); indexInterval = _input.ReadInt(); if (indexInterval < 1) { throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}", indexInterval, _input)); } _indexDivisor = indexDivisor; if (indexDivisor < 0) { _totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand _totalIndexInterval = indexInterval*indexDivisor; } Debug.Assert(_totalIndexInterval > 0); SeekDir(_input, _dirOffset); // Read directory int numFields = _input.ReadVInt(); if (numFields < 0) throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields, _input)); for (int i = 0; i < numFields; i++) { int field = _input.ReadVInt(); int numIndexTerms = _input.ReadVInt(); if (numIndexTerms < 0) throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}", numIndexTerms, _input)); long termsStart = _input.ReadVLong(); long indexStart = _input.ReadVLong(); long packedIndexStart = _input.ReadVLong(); long packedOffsetsStart = _input.ReadVLong(); if (packedIndexStart < indexStart) throw new CorruptIndexException( String.Format( "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}", packedIndexStart, indexStart, numIndexTerms, _input)); FieldInfo fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(_input); } if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) _indexLoaded = true; _termBytesReader = _termBytes.Freeze(true); } } }
private readonly int version; // LUCENENET: marked readonly //static final boolean TEST = false; public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) { string termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION); string termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION); this.postingsReader = postingsReader; ChecksumIndexInput indexIn = null; IndexInput blockIn = null; bool success = false; try { indexIn = state.Directory.OpenChecksumInput(termsIndexFileName, state.Context); blockIn = state.Directory.OpenInput(termsBlockFileName, state.Context); version = ReadHeader(indexIn); ReadHeader(blockIn); if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(blockIn); } this.postingsReader.Init(blockIn); SeekDir(blockIn); FieldInfos fieldInfos = state.FieldInfos; int numFields = blockIn.ReadVInt32(); for (int i = 0; i < numFields; i++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(blockIn.ReadVInt32()); bool hasFreq = fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY; long numTerms = blockIn.ReadVInt64(); long sumTotalTermFreq = hasFreq ? blockIn.ReadVInt64() : -1; long sumDocFreq = blockIn.ReadVInt64(); int docCount = blockIn.ReadVInt32(); int longsSize = blockIn.ReadVInt32(); var index = new FST <long?>(indexIn, PositiveInt32Outputs.Singleton); var current = new TermsReader(this, fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); // LUCENENET NOTE: This simulates a put operation in Java, // getting the prior value first before setting it. fields.TryGetValue(fieldInfo.Name, out TermsReader previous); fields[fieldInfo.Name] = current; CheckFieldSummary(state.SegmentInfo, indexIn, blockIn, current, previous); } if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.CheckFooter(indexIn); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(indexIn); #pragma warning restore 612, 618 } success = true; } finally { if (success) { IOUtils.Dispose(indexIn, blockIn); } else { IOUtils.DisposeWhileHandlingException(indexIn, blockIn); } } }
//static final boolean TEST = false; public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) { string termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION); string termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION); this.postingsReader = postingsReader; ChecksumIndexInput indexIn = null; IndexInput blockIn = null; bool success = false; try { indexIn = state.Directory.OpenChecksumInput(termsIndexFileName, state.Context); blockIn = state.Directory.OpenInput(termsBlockFileName, state.Context); version = ReadHeader(indexIn); ReadHeader(blockIn); if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(blockIn); } this.postingsReader.Init(blockIn); SeekDir(blockIn); FieldInfos fieldInfos = state.FieldInfos; int numFields = blockIn.ReadVInt(); for (int i = 0; i < numFields; i++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(blockIn.ReadVInt()); bool hasFreq = fieldInfo.IndexOptions != FieldInfo.IndexOptions.DOCS_ONLY; long numTerms = blockIn.ReadVLong(); long sumTotalTermFreq = hasFreq ? blockIn.ReadVLong() : -1; long sumDocFreq = blockIn.ReadVLong(); int docCount = blockIn.ReadVInt(); int longsSize = blockIn.ReadVInt(); var index = new FST <long>(indexIn, PositiveIntOutputs.Singleton); var current = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); var previous = fields[fieldInfo.Name] = current; CheckFieldSummary(state.SegmentInfo, indexIn, blockIn, current, previous); } if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.CheckFooter(indexIn); } else { CodecUtil.CheckEOF(indexIn); } success = true; } finally { if (success) { IOUtils.Close(indexIn, blockIn); } else { IOUtils.CloseWhileHandlingException(indexIn, blockIn); } } }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, string segment, int indexDivisor, IComparer <BytesRef> termComp, string segmentSuffix, IOContext context) { this.termComp = termComp; if (Debugging.AssertsEnabled) { Debugging.Assert(indexDivisor == -1 || indexDivisor > 0); } input = dir.OpenInput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); bool success = false; try { version = ReadHeader(input); if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(input); } indexInterval = input.ReadInt32(); if (indexInterval < 1) { throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + input + ")"); } this.indexDivisor = indexDivisor; if (indexDivisor < 0) { totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand totalIndexInterval = indexInterval * indexDivisor; } if (Debugging.AssertsEnabled) { Debugging.Assert(totalIndexInterval > 0); } SeekDir(input, dirOffset); // Read directory int numFields = input.ReadVInt32(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + input + ")"); } //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields); for (int i = 0; i < numFields; i++) { int field = input.ReadVInt32(); int numIndexTerms = input.ReadVInt32(); if (numIndexTerms < 0) { throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms + " (resource=" + input + ")"); } long termsStart = input.ReadVInt64(); long indexStart = input.ReadVInt64(); long packedIndexStart = input.ReadVInt64(); long packedOffsetsStart = input.ReadVInt64(); if (packedIndexStart < indexStart) { throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms + " (resource=" + input + ")"); } FieldInfo fieldInfo = fieldInfos.FieldInfo(field); FieldIndexData previous = fields.Put(fieldInfo, new FieldIndexData(this, /* fieldInfo, // LUCENENET: Not referenced */ numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.Name + " (resource=" + input + ")"); } } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(input); } if (indexDivisor > 0) { input.Dispose(); input = null; if (success) { indexLoaded = true; } termBytesReader = termBytes.Freeze(true); } } }
// private string segment; public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context, string segmentSuffix) { this.postingsReader = postingsReader; // this.segment = segment; input = dir.OpenInput(IndexFileNames.SegmentFileName(info.Name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION), context); bool success = false; try { version = ReadHeader(input); // Have PostingsReader init itself postingsReader.Init(input); // Read per-field details SeekDir(input, dirOffset); int numFields = input.ReadVInt32(); if (numFields < 0) { throw new CorruptIndexException("invalid number of fields: " + numFields + " (resource=" + input + ")"); } for (int i = 0; i < numFields; i++) { int field = input.ReadVInt32(); long numTerms = input.ReadVInt64(); Debug.Assert(numTerms >= 0); long termsStartPointer = input.ReadVInt64(); FieldInfo fieldInfo = fieldInfos.FieldInfo(field); long sumTotalTermFreq = fieldInfo.IndexOptions == IndexOptions.DOCS_ONLY ? -1 : input.ReadVInt64(); long sumDocFreq = input.ReadVInt64(); int docCount = input.ReadVInt32(); int longsSize = version >= BlockTermsWriter.VERSION_META_ARRAY ? input.ReadVInt32() : 0; if (docCount < 0 || docCount > info.DocCount) { // #docs with field must be <= #docs throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.DocCount + " (resource=" + input + ")"); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + input + ")"); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + input + ")"); } FieldReader previous = fields.Put(fieldInfo.Name, new FieldReader(this, fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); if (previous != null) { throw new CorruptIndexException("duplicate fields: " + fieldInfo.Name + " (resource=" + input + ")"); } } success = true; } finally { if (!success) { input.Dispose(); } } this.indexReader = indexReader; }
public TermsReader(FieldInfos fieldInfos, IndexInput @in, int termCount) { this.termCount = termCount; int fieldNumber = @in.ReadVInt(); field = fieldInfos.FieldInfo(fieldNumber); if (field.FieldIndexOptions != IndexOptions.DOCS_ONLY) { sumTotalTermFreq = @in.ReadVLong(); } else { sumTotalTermFreq = -1; } sumDocFreq = @in.ReadVLong(); docCount = @in.ReadVInt(); fst = new FST<BytesRef>(@in, outputs); }
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context, String segmentSuffix) { _postingsReader = postingsReader; _input = dir.OpenInput( IndexFileNames.SegmentFileName(info.Name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); // Have PostingsReader init itself postingsReader.Init(_input); // Read per-field details SeekDir(_input, _dirOffset); int numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException(String.Format("Invalid number of fields: {0}, Resource: {1}", numFields, _input)); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var numTerms = _input.ReadVLong(); Debug.Assert(numTerms >= 0); var termsStartPointer = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); var sumTotalTermFreq = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : _input.ReadVLong(); var sumDocFreq = _input.ReadVLong(); var docCount = _input.ReadVInt(); var longsSize = _version >= BlockTermsWriter.VERSION_META_ARRAY ? _input.ReadVInt() : 0; if (docCount < 0 || docCount > info.DocCount) { // #docs with field must be <= #docs throw new CorruptIndexException( String.Format("Invalid DocCount: {0}, MaxDoc: {1}, Resource: {2}", docCount, info.DocCount, _input)); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( String.Format("Invalid sumDocFreq: {0}, DocCount: {1}, Resource: {2}", sumDocFreq, docCount, _input)); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( String.Format("Invalid sumTotalTermFreq: {0}, sumDocFreq: {1}, Resource: {2}", sumTotalTermFreq, sumDocFreq, _input)); } try { _fields.Add(fieldInfo.Name, new FieldReader(fieldInfo, this, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate fields: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { _input.Dispose(); } } _indexReader = indexReader; }
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context) { _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); var success = false; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); try { _version = ReadHeader(_input); _indexDivisor = indexDivisor; if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(_input); } SeekDir(_input, _dirOffset); // Read directory var numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")"); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var indexStart = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(indexStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } } } }
public void Read(IndexInput input, FieldInfos fieldInfos) { this.Term = null; // invalidate cache NewSuffixStart = input.ReadVInt(); int length = input.ReadVInt(); int totalLength = NewSuffixStart + length; Debug.Assert(totalLength <= ByteBlockPool.BYTE_BLOCK_SIZE - 2, "termLength=" + totalLength + ",resource=" + input); if (Bytes.Bytes.Length < totalLength) { Bytes.Grow(totalLength); } Bytes.Length = totalLength; input.ReadBytes(Bytes.Bytes, NewSuffixStart, length); int fieldNumber = input.ReadVInt(); if (fieldNumber != CurrentFieldNumber) { CurrentFieldNumber = fieldNumber; // NOTE: too much sneakiness here, seriously this is a negative vint?! if (CurrentFieldNumber == -1) { Field = ""; } else { Debug.Assert(fieldInfos.FieldInfo(CurrentFieldNumber) != null, CurrentFieldNumber.ToString()); Field = String.Intern(fieldInfos.FieldInfo(CurrentFieldNumber).Name); } } else { Debug.Assert(Field.Equals(fieldInfos.FieldInfo(fieldNumber).Name), "currentFieldNumber=" + CurrentFieldNumber + " field=" + Field + " vs " + fieldInfos.FieldInfo(fieldNumber) == null ? "null" : fieldInfos.FieldInfo(fieldNumber).Name); } }
public override void VisitDocument(int docID, StoredFieldVisitor visitor) { fieldsStream.Seek(indexReader.GetStartPointer(docID)); int docBase = fieldsStream.ReadVInt32(); int chunkDocs = fieldsStream.ReadVInt32(); if (docID < docBase || docID >= docBase + chunkDocs || docBase + chunkDocs > numDocs) { throw new CorruptIndexException("Corrupted: docID=" + docID + ", docBase=" + docBase + ", chunkDocs=" + chunkDocs + ", numDocs=" + numDocs + " (resource=" + fieldsStream + ")"); } int numStoredFields, offset, length, totalLength; if (chunkDocs == 1) { numStoredFields = fieldsStream.ReadVInt32(); offset = 0; length = fieldsStream.ReadVInt32(); totalLength = length; } else { int bitsPerStoredFields = fieldsStream.ReadVInt32(); if (bitsPerStoredFields == 0) { numStoredFields = fieldsStream.ReadVInt32(); } else if (bitsPerStoredFields > 31) { throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")"); } else { long filePointer = fieldsStream.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream PackedInt32s.Reader reader = PackedInt32s.GetDirectReaderNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields); numStoredFields = (int)(reader.Get(docID - docBase)); fieldsStream.Seek(filePointer + PackedInt32s.Format.PACKED.ByteCount(packedIntsVersion, chunkDocs, bitsPerStoredFields)); } int bitsPerLength = fieldsStream.ReadVInt32(); if (bitsPerLength == 0) { length = fieldsStream.ReadVInt32(); offset = (docID - docBase) * length; totalLength = chunkDocs * length; } else if (bitsPerStoredFields > 31) { throw new CorruptIndexException("bitsPerLength=" + bitsPerLength + " (resource=" + fieldsStream + ")"); } else { PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerLength, 1); int off = 0; for (int i = 0; i < docID - docBase; ++i) { off += (int)it.Next(); } offset = off; length = (int)it.Next(); off += length; for (int i = docID - docBase + 1; i < chunkDocs; ++i) { off += (int)it.Next(); } totalLength = off; } } if ((length == 0) != (numStoredFields == 0)) { throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields + " (resource=" + fieldsStream + ")"); } if (numStoredFields == 0) { // nothing to do return; } DataInput documentInput; if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS && totalLength >= 2 * chunkSize) { if (Debugging.AssertsEnabled) { Debugging.Assert(chunkSize > 0); Debugging.Assert(offset < chunkSize); } decompressor.Decompress(fieldsStream, chunkSize, offset, Math.Min(length, chunkSize - offset), bytes); documentInput = new DataInputAnonymousClass(this, length); } else { BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef(); decompressor.Decompress(fieldsStream, totalLength, offset, length, bytes); if (Debugging.AssertsEnabled) { Debugging.Assert(bytes.Length == length); } documentInput = new ByteArrayDataInput(bytes.Bytes, bytes.Offset, bytes.Length); } for (int fieldIDX = 0; fieldIDX < numStoredFields; fieldIDX++) { long infoAndBits = documentInput.ReadVInt64(); int fieldNumber = (int)infoAndBits.TripleShift(CompressingStoredFieldsWriter.TYPE_BITS); FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); int bits = (int)(infoAndBits & CompressingStoredFieldsWriter.TYPE_MASK); if (Debugging.AssertsEnabled) { Debugging.Assert(bits <= CompressingStoredFieldsWriter.NUMERIC_DOUBLE, "bits={0:x}", bits); } switch (visitor.NeedsField(fieldInfo)) { case StoredFieldVisitor.Status.YES: ReadField(documentInput, visitor, fieldInfo, bits); break; case StoredFieldVisitor.Status.NO: SkipField(documentInput, bits); break; case StoredFieldVisitor.Status.STOP: return; } } }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, IComparer <BytesRef> termComp, String segmentSuffix, IOContext context) { _termComp = termComp; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(_input); } indexInterval = _input.ReadInt(); if (indexInterval < 1) { throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}", indexInterval, _input)); } _indexDivisor = indexDivisor; if (indexDivisor < 0) { _totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand _totalIndexInterval = indexInterval * indexDivisor; } Debug.Assert(_totalIndexInterval > 0); SeekDir(_input, _dirOffset); // Read directory int numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields, _input)); } for (int i = 0; i < numFields; i++) { int field = _input.ReadVInt(); int numIndexTerms = _input.ReadVInt(); if (numIndexTerms < 0) { throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}", numIndexTerms, _input)); } long termsStart = _input.ReadVLong(); long indexStart = _input.ReadVLong(); long packedIndexStart = _input.ReadVLong(); long packedOffsetsStart = _input.ReadVLong(); if (packedIndexStart < indexStart) { throw new CorruptIndexException( String.Format( "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}", packedIndexStart, indexStart, numIndexTerms, _input)); } FieldInfo fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(_input); } if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } _termBytesReader = _termBytes.Freeze(true); } } }
/// <summary> /// Sole constructor. </summary> public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext ioContext, string segmentSuffix, int indexDivisor) { if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; } this.PostingsReader = postingsReader; this.Segment = info.Name; @in = dir.OpenInput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION), ioContext); bool success = false; IndexInput indexIn = null; try { Version = ReadHeader(@in); if (indexDivisor != -1) { indexIn = dir.OpenInput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION), ioContext); int indexVersion = ReadIndexHeader(indexIn); if (indexVersion != Version) { throw new CorruptIndexException("mixmatched version files: " + @in + "=" + Version + "," + indexIn + "=" + indexVersion); } } // verify if (indexIn != null && Version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(indexIn); } // Have PostingsReader init itself postingsReader.Init(@in); // Read per-field details SeekDir(@in, DirOffset); if (indexDivisor != -1) { SeekDir(indexIn, IndexDirOffset); } int numFields = @in.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + @in + ")"); } for (int i = 0; i < numFields; i++) { int field = @in.ReadVInt(); long numTerms = @in.ReadVLong(); Debug.Assert(numTerms >= 0); int numBytes = @in.ReadVInt(); BytesRef rootCode = new BytesRef(new byte[numBytes]); @in.ReadBytes(rootCode.Bytes, 0, numBytes); rootCode.Length = numBytes; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null, "field=" + field); long sumTotalTermFreq = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : @in.ReadVLong(); long sumDocFreq = @in.ReadVLong(); int docCount = @in.ReadVInt(); int longsSize = Version >= BlockTreeTermsWriter.VERSION_META_ARRAY ? @in.ReadVInt() : 0; if (docCount < 0 || docCount > info.DocCount) // #docs with field must be <= #docs { throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.DocCount + " (resource=" + @in + ")"); } if (sumDocFreq < docCount) // #postings must be >= #docs with field { throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + @in + ")"); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) // #positions must be >= #postings { throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + @in + ")"); } long indexStartFP = indexDivisor != -1 ? indexIn.ReadVLong() : 0; if (Fields.ContainsKey(fieldInfo.Name)) { throw new CorruptIndexException("duplicate field: " + fieldInfo.Name + " (resource=" + @in + ")"); } else { Fields[fieldInfo.Name] = new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn); } } if (indexDivisor != -1) { indexIn.Dispose(); } success = true; } finally { if (!success) { // this.close() will close in: IOUtils.CloseWhileHandlingException(indexIn, this); } } }
public override void VisitDocument(int n, StoredFieldVisitor visitor) { _input.Seek(_offsets[n]); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NUM)); var numFields = ParseInt32At(SimpleTextStoredFieldsWriter.NUM.Length); for (var i = 0; i < numFields; i++) { ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.FIELD)); int fieldNumber = ParseInt32At(SimpleTextStoredFieldsWriter.FIELD.Length); FieldInfo fieldInfo = _fieldInfos.FieldInfo(fieldNumber); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NAME)); ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.TYPE)); BytesRef type; if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_STRING, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_STRING; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_BINARY, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_BINARY; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_INT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_INT; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_LONG, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_LONG; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_FLOAT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_FLOAT; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_DOUBLE, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_DOUBLE; } else { throw new Exception("unknown field type"); } switch (visitor.NeedsField(fieldInfo)) { case StoredFieldVisitor.Status.YES: ReadField(type, fieldInfo, visitor); break; case StoredFieldVisitor.Status.NO: ReadLine(); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE)); break; case StoredFieldVisitor.Status.STOP: return; } } }
public override void VisitDocument(int docID, StoredFieldVisitor visitor) { FieldsStream.Seek(IndexReader.GetStartPointer(docID)); int docBase = FieldsStream.ReadVInt(); int chunkDocs = FieldsStream.ReadVInt(); if (docID < docBase || docID >= docBase + chunkDocs || docBase + chunkDocs > NumDocs) { throw new CorruptIndexException("Corrupted: docID=" + docID + ", docBase=" + docBase + ", chunkDocs=" + chunkDocs + ", numDocs=" + NumDocs + " (resource=" + FieldsStream + ")"); } int numStoredFields, offset, length, totalLength; if (chunkDocs == 1) { numStoredFields = FieldsStream.ReadVInt(); offset = 0; length = FieldsStream.ReadVInt(); totalLength = length; } else { int bitsPerStoredFields = FieldsStream.ReadVInt(); if (bitsPerStoredFields == 0) { numStoredFields = FieldsStream.ReadVInt(); } else if (bitsPerStoredFields > 31) { throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + FieldsStream + ")"); } else { long filePointer = FieldsStream.FilePointer; PackedInts.Reader reader = PackedInts.GetDirectReaderNoHeader(FieldsStream, PackedInts.Format.PACKED, PackedIntsVersion, chunkDocs, bitsPerStoredFields); numStoredFields = (int)(reader.Get(docID - docBase)); FieldsStream.Seek(filePointer + PackedInts.Format.PACKED.ByteCount(PackedIntsVersion, chunkDocs, bitsPerStoredFields)); } int bitsPerLength = FieldsStream.ReadVInt(); if (bitsPerLength == 0) { length = FieldsStream.ReadVInt(); offset = (docID - docBase) * length; totalLength = chunkDocs * length; } else if (bitsPerStoredFields > 31) { throw new CorruptIndexException("bitsPerLength=" + bitsPerLength + " (resource=" + FieldsStream + ")"); } else { PackedInts.ReaderIterator it = PackedInts.GetReaderIteratorNoHeader(FieldsStream, PackedInts.Format.PACKED, PackedIntsVersion, chunkDocs, bitsPerLength, 1); int off = 0; for (int i = 0; i < docID - docBase; ++i) { off += (int)it.Next(); } offset = off; length = (int)it.Next(); off += length; for (int i = docID - docBase + 1; i < chunkDocs; ++i) { off += (int)it.Next(); } totalLength = off; } } if ((length == 0) != (numStoredFields == 0)) { throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields + " (resource=" + FieldsStream + ")"); } if (numStoredFields == 0) { // nothing to do return; } DataInput documentInput; if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS && totalLength >= 2 * ChunkSize_Renamed) { Debug.Assert(ChunkSize_Renamed > 0); Debug.Assert(offset < ChunkSize_Renamed); Decompressor.Decompress(FieldsStream, ChunkSize_Renamed, offset, Math.Min(length, ChunkSize_Renamed - offset), Bytes); documentInput = new DataInputAnonymousInnerClassHelper(this, offset, length); } else { BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.Bytes : new BytesRef(); Decompressor.Decompress(FieldsStream, totalLength, offset, length, bytes); Debug.Assert(bytes.Length == length); documentInput = new ByteArrayDataInput((byte[])(Array)bytes.Bytes, bytes.Offset, bytes.Length); } for (int fieldIDX = 0; fieldIDX < numStoredFields; fieldIDX++) { long infoAndBits = documentInput.ReadVLong(); int fieldNumber = (int)((long)((ulong)infoAndBits >> CompressingStoredFieldsWriter.TYPE_BITS)); FieldInfo fieldInfo = FieldInfos.FieldInfo(fieldNumber); int bits = (int)(infoAndBits & CompressingStoredFieldsWriter.TYPE_MASK); Debug.Assert(bits <= CompressingStoredFieldsWriter.NUMERIC_DOUBLE, "bits=" + bits.ToString("x")); switch (visitor.NeedsField(fieldInfo)) { case StoredFieldVisitor.Status.YES: ReadField(documentInput, visitor, fieldInfo, bits); break; case StoredFieldVisitor.Status.NO: SkipField(documentInput, bits); break; case StoredFieldVisitor.Status.STOP: return; } } }