internal TermsReader(FSTOrdTermsReader outerInstance, FieldInfo fieldInfo, IndexInput blockIn, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST <long> index) { this.outerInstance = outerInstance; this.fieldInfo = fieldInfo; this.numTerms = numTerms; this.sumTotalTermFreq = sumTotalTermFreq; this.sumDocFreq = sumDocFreq; this.docCount = docCount; this.longsSize = longsSize; this.index = index; Debug.Assert((numTerms & (~0xffffffffL)) == 0); int numBlocks = (int)(numTerms + INTERVAL - 1) / INTERVAL; this.numSkipInfo = longsSize + 3; this.skipInfo = new long[numBlocks * numSkipInfo]; this.statsBlock = new sbyte[(int)blockIn.ReadVLong()]; this.metaLongsBlock = new sbyte[(int)blockIn.ReadVLong()]; this.metaBytesBlock = new sbyte[(int)blockIn.ReadVLong()]; int last = 0, next = 0; for (int i = 1; i < numBlocks; i++) { next = numSkipInfo * i; for (int j = 0; j < numSkipInfo; j++) { skipInfo[next + j] = skipInfo[last + j] + blockIn.ReadVLong(); } last = next; } blockIn.ReadBytes(statsBlock, 0, statsBlock.Length); blockIn.ReadBytes(metaLongsBlock, 0, metaLongsBlock.Length); blockIn.ReadBytes(metaBytesBlock, 0, metaBytesBlock.Length); }
/// <summary> /// Increments the enumeration to the next element. True if one exists. </summary> public bool Next() { PrevBuffer.Set(TermBuffer); //System.out.println(" ste setPrev=" + prev() + " this=" + this); if (Position++ >= Size - 1) { TermBuffer.Reset(); //System.out.println(" EOF"); return(false); } TermBuffer.Read(Input, FieldInfos); NewSuffixStart = TermBuffer.NewSuffixStart; TermInfo_Renamed.DocFreq = Input.ReadVInt(); // read doc freq TermInfo_Renamed.FreqPointer += Input.ReadVLong(); // read freq pointer TermInfo_Renamed.ProxPointer += Input.ReadVLong(); // read prox pointer if (TermInfo_Renamed.DocFreq >= SkipInterval) { TermInfo_Renamed.SkipOffset = Input.ReadVInt(); } if (IsIndex) { IndexPointer += Input.ReadVLong(); // read index pointer } //System.out.println(" ste ret term=" + term()); return(true); }
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) { string termsFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTTermsWriter.TERMS_EXTENSION); this.postingsReader = postingsReader; IndexInput @in = state.Directory.OpenInput(termsFileName, state.Context); bool success = false; try { version = ReadHeader(@in); if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(@in); } this.postingsReader.Init(@in); SeekDir(@in); FieldInfos fieldInfos = state.FieldInfos; int numFields = @in.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = @in.ReadVInt(); FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); long numTerms = @in.ReadVLong(); long sumTotalTermFreq = fieldInfo.FieldIndexOptions == IndexOptions.DOCS_ONLY ? -1 : @in.ReadVLong(); long sumDocFreq = @in.ReadVLong(); int docCount = @in.ReadVInt(); int longsSize = @in.ReadVInt(); TermsReader current = new TermsReader(this, fieldInfo, @in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); TermsReader previous; // LUCENENET NOTE: This simulates a put operation in Java, // getting the prior value first before setting it. fields.TryGetValue(fieldInfo.Name, out previous); fields[fieldInfo.Name] = current; CheckFieldSummary(state.SegmentInfo, @in, current, previous); } success = true; } finally { if (success) { IOUtils.Close(@in); } else { IOUtils.CloseWhileHandlingException(@in); } } }
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context) { _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); var success = false; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); try { _version = ReadHeader(_input); _indexDivisor = indexDivisor; if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) CodecUtil.ChecksumEntireFile(_input); SeekDir(_input, _dirOffset); // Read directory var numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")"); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var indexStart = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(indexStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } } } }
internal static BinaryEntry ReadBinaryEntry(IndexInput meta) { BinaryEntry entry = new BinaryEntry(); entry.Format = meta.ReadVInt(); entry.MissingOffset = meta.ReadLong(); entry.MinLength = meta.ReadVInt(); entry.MaxLength = meta.ReadVInt(); entry.Count = meta.ReadVLong(); entry.Offset = meta.ReadLong(); switch (entry.Format) { case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED: break; case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED: entry.AddressInterval = meta.ReadVInt(); entry.AddressesOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); break; case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED: entry.AddressesOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); break; default: throw new Exception("Unknown format: " + entry.Format + ", input=" + meta); } return(entry); }
public TermsReader(FieldInfos fieldInfos, IndexInput @in, int termCount) { this.termCount = termCount; int fieldNumber = @in.ReadVInt(); field = fieldInfos.FieldInfo(fieldNumber); if (field.FieldIndexOptions != IndexOptions.DOCS_ONLY) { sumTotalTermFreq = @in.ReadVLong(); } else { sumTotalTermFreq = -1; } sumDocFreq = @in.ReadVLong(); docCount = @in.ReadVInt(); fst = new FST <BytesRef>(@in, outputs); }
/// <summary>Increments the enumeration to the next element. True if one exists.</summary> public override bool Next() { if (position++ >= size - 1) { prevBuffer.Set(termBuffer); termBuffer.Reset(); return(false); } prevBuffer.Set(termBuffer); termBuffer.Read(input, fieldInfos); termInfo.docFreq = input.ReadVInt(); // read doc freq termInfo.freqPointer += input.ReadVLong(); // read freq pointer termInfo.proxPointer += input.ReadVLong(); // read prox pointer if (format == -1) { // just read skipOffset in order to increment file pointer; // value is never used since skipTo is switched off if (!isIndex) { if (termInfo.docFreq > formatM1SkipInterval) { termInfo.skipOffset = input.ReadVInt(); } } } else { if (termInfo.docFreq >= skipInterval) { termInfo.skipOffset = input.ReadVInt(); } } if (isIndex) { indexPointer += input.ReadVLong(); // read index pointer } return(true); }
internal static NumericEntry ReadNumericEntry(IndexInput meta) { NumericEntry entry = new NumericEntry(); entry.Format = meta.ReadVInt(); entry.MissingOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.Offset = meta.ReadLong(); entry.Count = meta.ReadVLong(); entry.BlockSize = meta.ReadVInt(); switch (entry.Format) { case Lucene45DocValuesConsumer.GCD_COMPRESSED: entry.MinValue = meta.ReadLong(); entry.Gcd = meta.ReadLong(); break; case Lucene45DocValuesConsumer.TABLE_COMPRESSED: if (entry.Count > int.MaxValue) { throw new Exception("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta); } int uniqueValues = meta.ReadVInt(); if (uniqueValues > 256) { throw new Exception("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta); } entry.Table = new long[uniqueValues]; for (int i = 0; i < uniqueValues; ++i) { entry.Table[i] = meta.ReadLong(); } break; case Lucene45DocValuesConsumer.DELTA_COMPRESSED: break; default: throw new Exception("Unknown format: " + entry.Format + ", input=" + meta); } return(entry); }
/// <summary> /// Sole constructor. </summary> public MonotonicBlockPackedReader(IndexInput @in, int packedIntsVersion, int blockSize, long valueCount, bool direct) { this.ValueCount = valueCount; BlockShift = PackedInts.CheckBlockSize(blockSize, AbstractBlockPackedWriter.MIN_BLOCK_SIZE, AbstractBlockPackedWriter.MAX_BLOCK_SIZE); BlockMask = blockSize - 1; int numBlocks = PackedInts.NumBlocks(valueCount, blockSize); MinValues = new long[numBlocks]; Averages = new float[numBlocks]; SubReaders = new PackedInts.Reader[numBlocks]; for (int i = 0; i < numBlocks; ++i) { MinValues[i] = @in.ReadVLong(); Averages[i] = Number.IntBitsToFloat(@in.ReadInt()); int bitsPerValue = @in.ReadVInt(); if (bitsPerValue > 64) { throw new Exception("Corrupted"); } if (bitsPerValue == 0) { SubReaders[i] = new PackedInts.NullReader(blockSize); } else { int size = (int)Math.Min(blockSize, valueCount - (long)i * blockSize); if (direct) { long pointer = @in.FilePointer; SubReaders[i] = PackedInts.GetDirectReaderNoHeader(@in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue); @in.Seek(pointer + PackedInts.Format.PACKED.ByteCount(packedIntsVersion, size, bitsPerValue)); } else { SubReaders[i] = PackedInts.GetReaderNoHeader(@in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue); } } } }
private BinaryDocValues LoadBytesVarStraight(FieldInfo field) { string dataName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat"); string indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx"); IndexInput data = null; IndexInput index = null; bool success = false; try { data = Dir.OpenInput(dataName, State.Context); CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); index = Dir.OpenInput(indexName, State.Context); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); long totalBytes = index.ReadVLong(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, totalBytes); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInts.Reader reader = PackedInts.GetReader(index); CodecUtil.CheckEOF(data); CodecUtil.CheckEOF(index); success = true; RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed()); return(new BinaryDocValuesAnonymousInnerClassHelper2(this, bytesReader, reader)); } finally { if (success) { IOUtils.Close(data, index); } else { IOUtils.CloseWhileHandlingException(data, index); } } }
//static final boolean TEST = false; public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) { string termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION); string termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION); this.postingsReader = postingsReader; ChecksumIndexInput indexIn = null; IndexInput blockIn = null; bool success = false; try { indexIn = state.Directory.OpenChecksumInput(termsIndexFileName, state.Context); blockIn = state.Directory.OpenInput(termsBlockFileName, state.Context); version = ReadHeader(indexIn); ReadHeader(blockIn); if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(blockIn); } this.postingsReader.Init(blockIn); SeekDir(blockIn); FieldInfos fieldInfos = state.FieldInfos; int numFields = blockIn.ReadVInt(); for (int i = 0; i < numFields; i++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(blockIn.ReadVInt()); bool hasFreq = fieldInfo.IndexOptions != FieldInfo.IndexOptions.DOCS_ONLY; long numTerms = blockIn.ReadVLong(); long sumTotalTermFreq = hasFreq ? blockIn.ReadVLong() : -1; long sumDocFreq = blockIn.ReadVLong(); int docCount = blockIn.ReadVInt(); int longsSize = blockIn.ReadVInt(); var index = new FST <long>(indexIn, PositiveIntOutputs.Singleton); var current = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); var previous = fields[fieldInfo.Name] = current; CheckFieldSummary(state.SegmentInfo, indexIn, blockIn, current, previous); } if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.CheckFooter(indexIn); } else { CodecUtil.CheckEOF(indexIn); } success = true; } finally { if (success) { IOUtils.Close(indexIn, blockIn); } else { IOUtils.CloseWhileHandlingException(indexIn, blockIn); } } }
/// <summary> /// Sole constructor. </summary> public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext ioContext, string segmentSuffix, int indexDivisor) { if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; } this.PostingsReader = postingsReader; this.Segment = info.Name; @in = dir.OpenInput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION), ioContext); bool success = false; IndexInput indexIn = null; try { Version = ReadHeader(@in); if (indexDivisor != -1) { indexIn = dir.OpenInput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION), ioContext); int indexVersion = ReadIndexHeader(indexIn); if (indexVersion != Version) { throw new CorruptIndexException("mixmatched version files: " + @in + "=" + Version + "," + indexIn + "=" + indexVersion); } } // verify if (indexIn != null && Version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(indexIn); } // Have PostingsReader init itself postingsReader.Init(@in); // Read per-field details SeekDir(@in, DirOffset); if (indexDivisor != -1) { SeekDir(indexIn, IndexDirOffset); } int numFields = @in.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + @in + ")"); } for (int i = 0; i < numFields; i++) { int field = @in.ReadVInt(); long numTerms = @in.ReadVLong(); Debug.Assert(numTerms >= 0); int numBytes = @in.ReadVInt(); BytesRef rootCode = new BytesRef(new byte[numBytes]); @in.ReadBytes(rootCode.Bytes, 0, numBytes); rootCode.Length = numBytes; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null, "field=" + field); long sumTotalTermFreq = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : @in.ReadVLong(); long sumDocFreq = @in.ReadVLong(); int docCount = @in.ReadVInt(); int longsSize = Version >= BlockTreeTermsWriter.VERSION_META_ARRAY ? @in.ReadVInt() : 0; if (docCount < 0 || docCount > info.DocCount) // #docs with field must be <= #docs { throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.DocCount + " (resource=" + @in + ")"); } if (sumDocFreq < docCount) // #postings must be >= #docs with field { throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + @in + ")"); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) // #positions must be >= #postings { throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + @in + ")"); } long indexStartFP = indexDivisor != -1 ? indexIn.ReadVLong() : 0; if (Fields.ContainsKey(fieldInfo.Name)) { throw new CorruptIndexException("duplicate field: " + fieldInfo.Name + " (resource=" + @in + ")"); } else { Fields[fieldInfo.Name] = new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn); } } if (indexDivisor != -1) { indexIn.Dispose(); } success = true; } finally { if (!success) { // this.close() will close in: IOUtils.CloseWhileHandlingException(indexIn, this); } } }
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context, String segmentSuffix) { _postingsReader = postingsReader; _input = dir.OpenInput( IndexFileNames.SegmentFileName(info.Name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); // Have PostingsReader init itself postingsReader.Init(_input); // Read per-field details SeekDir(_input, _dirOffset); int numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException(String.Format("Invalid number of fields: {0}, Resource: {1}", numFields, _input)); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var numTerms = _input.ReadVLong(); Debug.Assert(numTerms >= 0); var termsStartPointer = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); var sumTotalTermFreq = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : _input.ReadVLong(); var sumDocFreq = _input.ReadVLong(); var docCount = _input.ReadVInt(); var longsSize = _version >= BlockTermsWriter.VERSION_META_ARRAY ? _input.ReadVInt() : 0; if (docCount < 0 || docCount > info.DocCount) { // #docs with field must be <= #docs throw new CorruptIndexException( String.Format("Invalid DocCount: {0}, MaxDoc: {1}, Resource: {2}", docCount, info.DocCount, _input)); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException( String.Format("Invalid sumDocFreq: {0}, DocCount: {1}, Resource: {2}", sumDocFreq, docCount, _input)); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( String.Format("Invalid sumTotalTermFreq: {0}, sumDocFreq: {1}, Resource: {2}", sumTotalTermFreq, sumDocFreq, _input)); } try { _fields.Add(fieldInfo.Name, new FieldReader(fieldInfo, this, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate fields: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { _input.Dispose(); } } _indexReader = indexReader; }
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context) { _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); var success = false; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); try { _version = ReadHeader(_input); _indexDivisor = indexDivisor; if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(_input); } SeekDir(_input, _dirOffset); // Read directory var numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")"); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var indexStart = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(indexStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } } } }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616 if (fieldNumber < 0) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new CorruptIndexException("Invalid field number: " + fieldNumber + ", input=" + meta); } int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry { Offset = meta.ReadLong(), Format = (sbyte)meta.ReadByte() }; switch (entry.Format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.Format + ", input=" + meta); } if (entry.Format != UNCOMPRESSED) { entry.PackedIntsVersion = meta.ReadVInt(); } Numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { BinaryEntry entry = new BinaryEntry(); entry.Offset = meta.ReadLong(); entry.NumBytes = meta.ReadLong(); entry.MinLength = meta.ReadVInt(); entry.MaxLength = meta.ReadVInt(); if (entry.MinLength != entry.MaxLength) { entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); } Binaries[fieldNumber] = entry; } else if (fieldType == FST) { FSTEntry entry = new FSTEntry(); entry.Offset = meta.ReadLong(); entry.NumOrds = meta.ReadVLong(); Fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt(); } }
public TermsReader(FieldInfos fieldInfos, IndexInput @in, int termCount) { this.termCount = termCount; int fieldNumber = @in.ReadVInt(); field = fieldInfos.FieldInfo(fieldNumber); if (field.FieldIndexOptions != IndexOptions.DOCS_ONLY) { sumTotalTermFreq = @in.ReadVLong(); } else { sumTotalTermFreq = -1; } sumDocFreq = @in.ReadVLong(); docCount = @in.ReadVInt(); fst = new FST<BytesRef>(@in, outputs); }
internal readonly PackedInts.Reader[] StartPointersDeltas; // delta from the avg // It is the responsibility of the caller to close fieldsIndexIn after this constructor // has been called internal CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) { MaxDoc = si.DocCount; int[] docBases = new int[16]; long[] startPointers = new long[16]; int[] avgChunkDocs = new int[16]; long[] avgChunkSizes = new long[16]; PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16]; PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16]; int packedIntsVersion = fieldsIndexIn.ReadVInt(); int blockCount = 0; for (; ;) { int numChunks = fieldsIndexIn.ReadVInt(); if (numChunks == 0) { break; } if (blockCount == docBases.Length) { int newSize = ArrayUtil.Oversize(blockCount + 1, 8); docBases = Arrays.CopyOf(docBases, newSize); startPointers = Arrays.CopyOf(startPointers, newSize); avgChunkDocs = Arrays.CopyOf(avgChunkDocs, newSize); avgChunkSizes = Arrays.CopyOf(avgChunkSizes, newSize); docBasesDeltas = Arrays.CopyOf(docBasesDeltas, newSize); startPointersDeltas = Arrays.CopyOf(startPointersDeltas, newSize); } // doc bases docBases[blockCount] = fieldsIndexIn.ReadVInt(); avgChunkDocs[blockCount] = fieldsIndexIn.ReadVInt(); int bitsPerDocBase = fieldsIndexIn.ReadVInt(); if (bitsPerDocBase > 32) { throw new CorruptIndexException("Corrupted bitsPerDocBase (resource=" + fieldsIndexIn + ")"); } docBasesDeltas[blockCount] = PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase); // start pointers startPointers[blockCount] = fieldsIndexIn.ReadVLong(); avgChunkSizes[blockCount] = fieldsIndexIn.ReadVLong(); int bitsPerStartPointer = fieldsIndexIn.ReadVInt(); if (bitsPerStartPointer > 64) { throw new CorruptIndexException("Corrupted bitsPerStartPointer (resource=" + fieldsIndexIn + ")"); } startPointersDeltas[blockCount] = PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer); ++blockCount; } this.DocBases = Arrays.CopyOf(docBases, blockCount); this.StartPointers = Arrays.CopyOf(startPointers, blockCount); this.AvgChunkDocs = Arrays.CopyOf(avgChunkDocs, blockCount); this.AvgChunkSizes = Arrays.CopyOf(avgChunkSizes, blockCount); this.DocBasesDeltas = Arrays.CopyOf(docBasesDeltas, blockCount); this.StartPointersDeltas = Arrays.CopyOf(startPointersDeltas, blockCount); }
/// <summary> /// Reads the snapshots information from the given <seealso cref="Directory"/>. this /// method can be used if the snapshots information is needed, however you /// cannot instantiate the deletion policy (because e.g., some other process /// keeps a lock on the snapshots directory). /// </summary> private void LoadPriorSnapshots() { lock (this) { long genLoaded = -1; IOException ioe = null; IList <string> snapshotFiles = new List <string>(); foreach (string file in Dir.ListAll()) { if (file.StartsWith(SNAPSHOTS_PREFIX)) { long gen = Convert.ToInt64(file.Substring(SNAPSHOTS_PREFIX.Length)); if (genLoaded == -1 || gen > genLoaded) { snapshotFiles.Add(file); IDictionary <long, int> m = new Dictionary <long, int>(); IndexInput @in = Dir.OpenInput(file, IOContext.DEFAULT); try { CodecUtil.CheckHeader(@in, CODEC_NAME, VERSION_START, VERSION_START); int count = @in.ReadVInt(); for (int i = 0; i < count; i++) { long commitGen = @in.ReadVLong(); int refCount = @in.ReadVInt(); m[commitGen] = refCount; } } catch (IOException ioe2) { // Save first exception & throw in the end if (ioe == null) { ioe = ioe2; } } finally { @in.Dispose(); } genLoaded = gen; RefCounts.Clear(); RefCounts.PutAll(m); } } } if (genLoaded == -1) { // Nothing was loaded... if (ioe != null) { // ... not for lack of trying: throw ioe; } } else { if (snapshotFiles.Count > 1) { // Remove any broken / old snapshot files: string curFileName = SNAPSHOTS_PREFIX + genLoaded; foreach (string file in snapshotFiles) { if (!curFileName.Equals(file)) { Dir.DeleteFile(file); } } } NextWriteGen = 1 + genLoaded; } } }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry { offset = meta.ReadLong(), missingOffset = meta.ReadLong() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } entry.format = meta.ReadByte(); switch (entry.format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta); } if (entry.format != UNCOMPRESSED) { entry.packedIntsVersion = meta.ReadVInt(); } numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { var entry = new BinaryEntry { offset = meta.ReadLong(), numBytes = meta.ReadLong(), missingOffset = meta.ReadLong() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } entry.minLength = meta.ReadVInt(); entry.maxLength = meta.ReadVInt(); if (entry.minLength != entry.maxLength) { entry.packedIntsVersion = meta.ReadVInt(); entry.blockSize = meta.ReadVInt(); } binaries[fieldNumber] = entry; } else if (fieldType == FST) { var entry = new FSTEntry { offset = meta.ReadLong(), numOrds = meta.ReadVLong() }; fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt(); } }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, IComparer <BytesRef> termComp, String segmentSuffix, IOContext context) { _termComp = termComp; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(_input); } indexInterval = _input.ReadInt(); if (indexInterval < 1) { throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}", indexInterval, _input)); } _indexDivisor = indexDivisor; if (indexDivisor < 0) { _totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand _totalIndexInterval = indexInterval * indexDivisor; } Debug.Assert(_totalIndexInterval > 0); SeekDir(_input, _dirOffset); // Read directory int numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields, _input)); } for (int i = 0; i < numFields; i++) { int field = _input.ReadVInt(); int numIndexTerms = _input.ReadVInt(); if (numIndexTerms < 0) { throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}", numIndexTerms, _input)); } long termsStart = _input.ReadVLong(); long indexStart = _input.ReadVLong(); long packedIndexStart = _input.ReadVLong(); long packedOffsetsStart = _input.ReadVLong(); if (packedIndexStart < indexStart) { throw new CorruptIndexException( String.Format( "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}", packedIndexStart, indexStart, numIndexTerms, _input)); } FieldInfo fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(_input); } if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } _termBytesReader = _termBytes.Freeze(true); } } }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, IComparer<BytesRef> termComp, String segmentSuffix, IOContext context) { _termComp = termComp; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) CodecUtil.ChecksumEntireFile(_input); indexInterval = _input.ReadInt(); if (indexInterval < 1) { throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}", indexInterval, _input)); } _indexDivisor = indexDivisor; if (indexDivisor < 0) { _totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand _totalIndexInterval = indexInterval*indexDivisor; } Debug.Assert(_totalIndexInterval > 0); SeekDir(_input, _dirOffset); // Read directory int numFields = _input.ReadVInt(); if (numFields < 0) throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields, _input)); for (int i = 0; i < numFields; i++) { int field = _input.ReadVInt(); int numIndexTerms = _input.ReadVInt(); if (numIndexTerms < 0) throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}", numIndexTerms, _input)); long termsStart = _input.ReadVLong(); long indexStart = _input.ReadVLong(); long packedIndexStart = _input.ReadVLong(); long packedOffsetsStart = _input.ReadVLong(); if (packedIndexStart < indexStart) throw new CorruptIndexException( String.Format( "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}", packedIndexStart, indexStart, numIndexTerms, _input)); FieldInfo fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(_input); } if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) _indexLoaded = true; _termBytesReader = _termBytes.Freeze(true); } } }
public virtual void Get(int docNum, System.String field, TermVectorMapper mapper) { if (tvx != null) { int fieldNumber = fieldInfos.FieldNumber(field); //We need to account for the FORMAT_SIZE at when seeking in the tvx //We don't need to do this in other seeks because we already have the // file pointer //that was written in another file SeekTvx(docNum); //System.out.println("TVX Pointer: " + tvx.getFilePointer()); long tvdPosition = tvx.ReadLong(); tvd.Seek(tvdPosition); int fieldCount = tvd.ReadVInt(); //System.out.println("Num Fields: " + fieldCount); // There are only a few fields per document. We opt for a full scan // rather then requiring that they be ordered. We need to read through // all of the fields anyway to get to the tvf pointers. int number = 0; int found = -1; for (int i = 0; i < fieldCount; i++) { if (format >= FORMAT_VERSION) { number = tvd.ReadVInt(); } else { number += tvd.ReadVInt(); } if (number == fieldNumber) { found = i; } } // This field, although valid in the segment, was not found in this // document if (found != -1) { // Compute position in the tvf file long position; if (format >= FORMAT_VERSION2) { position = tvx.ReadLong(); } else { position = tvd.ReadVLong(); } for (int i = 1; i <= found; i++) { position += tvd.ReadVLong(); } mapper.SetDocumentNumber(docNum); ReadTermVector(field, position, mapper); } else { //System.out.println("Fieldable not found"); } } else { //System.out.println("No tvx file"); } }