public void Read(IndexInput input, FieldInfos fieldInfos, IState state) { this.term = null; // invalidate cache int start = input.ReadVInt(state); int length = input.ReadVInt(state); int totalLength = start + length; if (preUTF8Strings) { text.SetLength(totalLength); input.ReadChars(text.result, start, length, state); } else { if (dirty) { // Fully convert all bytes since bytes is dirty UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes); bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length, state); UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text); dirty = false; } else { // Incrementally convert only the UTF8 bytes that are new: bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length, state); UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text); } } this.field = fieldInfos.FieldName(input.ReadVInt(state)); }
protected internal override int ReadSkipData(int level, IndexInput skipStream, IState state) { int delta; if (currentFieldStoresPayloads) { // the current field stores payloads. // if the doc delta is odd then we have // to read the current payload length // because it differs from the length of the // previous payload delta = skipStream.ReadVInt(state); if ((delta & 1) != 0) { payloadLength[level] = skipStream.ReadVInt(state); } delta = Number.URShift(delta, 1); } else { delta = skipStream.ReadVInt(state); } freqPointer[level] += skipStream.ReadVInt(state); proxPointer[level] += skipStream.ReadVInt(state); return(delta); }
public /*internal*/ Document Doc(int n, FieldSelector fieldSelector, IState state) { SeekIndex(n, state); long position = indexStream.ReadLong(state); fieldsStream.Seek(position, state); var doc = new Document(); int numFields = fieldsStream.ReadVInt(state); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(state); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name); byte bits = fieldsStream.ReadByte(state); System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; System.Diagnostics.Debug.Assert( (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)), "compressed fields are only allowed in indexes of version <= 2.9"); bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; //TODO: Find an alternative approach here if this list continues to grow beyond the //list of 5 or 6 currently here. See Lucene 762 for discussion if (acceptField.Equals(FieldSelectorResult.LOAD)) { AddField(doc, fi, binary, compressed, tokenize, state); } else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK)) { AddField(doc, fi, binary, compressed, tokenize, state); break; //Get out of this loop } else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD)) { AddFieldLazy(doc, fi, binary, compressed, tokenize, state); } else if (acceptField.Equals(FieldSelectorResult.SIZE)) { SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed, state), state); } else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK)) { AddFieldSize(doc, fi, binary, compressed, state); break; } else { SkipField(binary, compressed, state); } } return(doc); }
private void Read(IndexInput input, String fileName, IState state) { int firstInt = input.ReadVInt(state); if (firstInt < 0) { // This is a real format format = firstInt; } else { format = FORMAT_PRE; } if (format != FORMAT_PRE & format != FORMAT_START) { throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\""); } int size; if (format == FORMAT_PRE) { size = firstInt; } else { size = input.ReadVInt(state); //read in the size } for (int i = 0; i < size; i++) { String name = StringHelper.Intern(input.ReadString(state)); byte bits = input.ReadByte(state); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; bool storePayloads = (bits & STORE_PAYLOADS) != 0; bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } if (input.FilePointer(state) != input.Length(state)) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer(state) + " vs size " + input.Length(state)); } }
public virtual void TestCloneClose() { MMapDirectory mmapDir = new MMapDirectory(CreateTempDir("testCloneClose")); IndexOutput io = mmapDir.CreateOutput("bytes", NewIOContext(Random())); io.WriteVInt(5); io.Dispose(); IndexInput one = mmapDir.OpenInput("bytes", IOContext.DEFAULT); IndexInput two = (IndexInput)one.Clone(); IndexInput three = (IndexInput)two.Clone(); // clone of clone two.Dispose(); Assert.AreEqual(5, one.ReadVInt()); try { two.ReadVInt(); Assert.Fail("Must throw AlreadyClosedException"); } catch (AlreadyClosedException ignore) { // pass } Assert.AreEqual(5, three.ReadVInt()); one.Dispose(); three.Dispose(); mmapDir.Dispose(); }
private int ReadDeltaPosition(IState state) { int delta = proxStream.ReadVInt(state); if (currentFieldStoresPayloads) { // if the current field stores payloads then // the position delta is shifted one bit to the left. // if the LSB is set, then we have to read the current // payload length if ((delta & 1) != 0) { payloadLength = proxStream.ReadVInt(state); } delta = Number.URShift(delta, 1); needToLoadPayload = true; } return(delta); }
/// <summary>Increments the enumeration to the next element. True if one exists.</summary> public override bool Next(IState state) { if (position++ >= size - 1) { prevBuffer.Set(termBuffer); termBuffer.Reset(); return(false); } prevBuffer.Set(termBuffer); termBuffer.Read(input, fieldInfos, state); termInfo.docFreq = input.ReadVInt(state); // read doc freq termInfo.freqPointer += input.ReadVLong(state); // read freq pointer termInfo.proxPointer += input.ReadVLong(state); // read prox pointer if (format == -1) { // just read skipOffset in order to increment file pointer; // value is never used since skipTo is switched off if (!isIndex) { if (termInfo.docFreq > formatM1SkipInterval) { termInfo.skipOffset = input.ReadVInt(state); } } } else { if (termInfo.docFreq >= skipInterval) { termInfo.skipOffset = input.ReadVInt(state); } } if (isIndex) { indexPointer += input.ReadVLong(state); // read index pointer } return(true); }
public CompoundFileReader(Directory dir, System.String name, int readBufferSize, IState state) { directory = dir; fileName = name; this.readBufferSize = readBufferSize; bool success = false; try { stream = dir.OpenInput(name, readBufferSize, state); // read the directory and init files int count = stream.ReadVInt(state); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(state); System.String id = stream.ReadString(state); if (entry != null) { // set length of the previous entry entry.length = offset - entry.offset; } entry = new FileEntry { offset = offset }; entries[id] = entry; } // set the length of the final entry if (entry != null) { entry.length = stream.Length(state) - entry.offset; } success = true; } finally { if (!success && (stream != null)) { try { stream.Close(); } catch (System.IO.IOException) { } } } }
public virtual bool Next(IState state) { while (true) { if (count == df) { return(false); } int docCode = freqStream.ReadVInt(state); if (currentFieldOmitTermFreqAndPositions) { doc += docCode; freq = 1; } else { doc += Number.URShift(docCode, 1); // shift off low bit if ((docCode & 1) != 0) { // if low bit is set freq = 1; } // freq is one else { freq = freqStream.ReadVInt(state); // else read freq } } count++; if (deletedDocs == null || !deletedDocs.Get(doc)) { break; } SkippingDoc(); } return(true); }
/// <summary>read as a d-gaps list </summary> private void ReadDgaps(IndexInput input, IState state) { size = input.ReadInt(state); // (re)read size count = input.ReadInt(state); // read count bits = new byte[(size >> 3) + 1]; // allocate bits int last = 0; int n = Count(); while (n > 0) { last += input.ReadVInt(state); bits[last] = input.ReadByte(state); n -= BYTE_COUNTS[bits[last] & 0xFF]; } }
public override void Init(IndexInput termsIn) { version = CodecUtil.CheckHeader(termsIn, PulsingPostingsWriter.CODEC, PulsingPostingsWriter.VERSION_START, PulsingPostingsWriter.VERSION_CURRENT); maxPositions = termsIn.ReadVInt(); _wrappedPostingsReader.Init(termsIn); if (_wrappedPostingsReader is PulsingPostingsReader || version < PulsingPostingsWriter.VERSION_META_ARRAY) { fields = null; } else { fields = new SortedDictionary<int, int>(); String summaryFileName = IndexFileNames.SegmentFileName(segmentState.SegmentInfo.Name, segmentState.SegmentSuffix, PulsingPostingsWriter.SUMMARY_EXTENSION); IndexInput input = null; try { input = segmentState.Directory.OpenInput(summaryFileName, segmentState.Context); CodecUtil.CheckHeader(input, PulsingPostingsWriter.CODEC, version, PulsingPostingsWriter.VERSION_CURRENT); int numField = input.ReadVInt(); for (int i = 0; i < numField; i++) { int fieldNum = input.ReadVInt(); int longsSize = input.ReadVInt(); fields.Add(fieldNum, longsSize); } } finally { IOUtils.CloseWhileHandlingException(input); } } }
/// <summary> /// Sole constructor. </summary> public MonotonicBlockPackedReader(IndexInput @in, int packedIntsVersion, int blockSize, long valueCount, bool direct) { this.ValueCount = valueCount; BlockShift = PackedInts.CheckBlockSize(blockSize, AbstractBlockPackedWriter.MIN_BLOCK_SIZE, AbstractBlockPackedWriter.MAX_BLOCK_SIZE); BlockMask = blockSize - 1; int numBlocks = PackedInts.NumBlocks(valueCount, blockSize); MinValues = new long[numBlocks]; Averages = new float[numBlocks]; SubReaders = new PackedInts.Reader[numBlocks]; for (int i = 0; i < numBlocks; ++i) { MinValues[i] = @in.ReadVLong(); Averages[i] = Number.IntBitsToFloat(@in.ReadInt()); int bitsPerValue = @in.ReadVInt(); if (bitsPerValue > 64) { throw new Exception("Corrupted"); } if (bitsPerValue == 0) { SubReaders[i] = new PackedInts.NullReader(blockSize); } else { int size = (int)Math.Min(blockSize, valueCount - (long)i * blockSize); if (direct) { long pointer = @in.FilePointer; SubReaders[i] = PackedInts.GetDirectReaderNoHeader(@in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue); @in.Seek(pointer + PackedInts.Format.PACKED.ByteCount(packedIntsVersion, size, bitsPerValue)); } else { SubReaders[i] = PackedInts.GetReaderNoHeader(@in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue); } } } }
public FixedIntBlockIndexInput(IndexInput @in) { input = @in; blockSize = @in.ReadVInt(); }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry {offset = meta.ReadLong(), missingOffset = meta.ReadLong()}; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } entry.format = meta.ReadByte(); switch (entry.format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta); } if (entry.format != UNCOMPRESSED) { entry.packedIntsVersion = meta.ReadVInt(); } numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { var entry = new BinaryEntry { offset = meta.ReadLong(), numBytes = meta.ReadLong(), missingOffset = meta.ReadLong() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } entry.minLength = meta.ReadVInt(); entry.maxLength = meta.ReadVInt(); if (entry.minLength != entry.maxLength) { entry.packedIntsVersion = meta.ReadVInt(); entry.blockSize = meta.ReadVInt(); } binaries[fieldNumber] = entry; } else if (fieldType == FST) { var entry = new FSTEntry {offset = meta.ReadLong(), numOrds = meta.ReadVLong()}; fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt(); } }
public override int ReadVInt() { EnsureOpen(); return(@delegate.ReadVInt()); }
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context) { input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); this.segment = segment; bool success = false; Debug.Debug.Assert((indexDivisor == -1 || indexDivisor > 0); try { version = readHeader(input); this.indexDivisor = indexDivisor; if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(input); } SeekDir(in, dirOffset) ; // Read directory int numFields = input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + input + ")"); } for (int i = 0; i < numFields; i++) { final int field = in. readVInt(); final long indexStart = in. readVLong(); final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(fieldInfo, indexStart)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" +in + ")" ) ; } } success = true; } finally { if (indexDivisor > 0) { in. close(); in = null; if (success) { indexLoaded = true; } } }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616 if (fieldNumber < 0) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new CorruptIndexException("Invalid field number: " + fieldNumber + ", input=" + meta); } int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry {Offset = meta.ReadLong(), Format = (sbyte)meta.ReadByte()}; switch (entry.Format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.Format + ", input=" + meta); } if (entry.Format != UNCOMPRESSED) { entry.PackedIntsVersion = meta.ReadVInt(); } Numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { BinaryEntry entry = new BinaryEntry(); entry.Offset = meta.ReadLong(); entry.NumBytes = meta.ReadLong(); entry.MinLength = meta.ReadVInt(); entry.MaxLength = meta.ReadVInt(); if (entry.MinLength != entry.MaxLength) { entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); } Binaries[fieldNumber] = entry; } else if (fieldType == FST) { FSTEntry entry = new FSTEntry(); entry.Offset = meta.ReadLong(); entry.NumOrds = meta.ReadVLong(); Fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt(); } }
private static IDictionary <string, FileEntry> ReadLegacyEntries(IndexInput stream, int firstInt) { IDictionary <string, FileEntry> entries = new Dictionary <string, FileEntry>(); int count; bool stripSegmentName; if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION) { if (firstInt < CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX) { throw new CorruptIndexException("Incompatible format version: " + firstInt + " expected >= " + CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX + " (resource: " + stream + ")"); } // It's a post-3.1 index, read the count. count = stream.ReadVInt(); stripSegmentName = false; } else { count = firstInt; stripSegmentName = true; } // read the directory and init files long streamLength = stream.Length(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); if (offset < 0 || offset > streamLength) { throw new CorruptIndexException("Invalid CFS entry offset: " + offset + " (resource: " + stream + ")"); } string id = stream.ReadString(); if (stripSegmentName) { // Fix the id to not include the segment names. this is relevant for // pre-3.1 indexes. id = IndexFileNames.StripSegmentName(id); } if (entry != null) { // set length of the previous entry entry.Length = offset - entry.Offset; } entry = new FileEntry(); entry.Offset = offset; FileEntry previous = entries[id] = entry; if (previous != null) { throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + stream); } } // set the length of the final entry if (entry != null) { entry.Length = streamLength - entry.Offset; } return(entries); }
/// <summary> </summary> /// <param name="field">The field to read in /// </param> /// <param name="tvfPointer">The pointer within the tvf file where we should start reading /// </param> /// <param name="mapper">The mapper used to map the TermVector /// </param> /// <throws> IOException </throws> private void ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper, IState state) { // Now read the data from specified position //We don't need to offset by the FORMAT here since the pointer already includes the offset tvf.Seek(tvfPointer, state); int numTerms = tvf.ReadVInt(state); //System.out.println("Num Terms: " + numTerms); // If no terms - return a constant empty termvector. However, this should never occur! if (numTerms == 0) { return; } bool storePositions; bool storeOffsets; if (format >= FORMAT_VERSION) { byte bits = tvf.ReadByte(state); storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; } else { tvf.ReadVInt(state); storePositions = false; storeOffsets = false; } mapper.SetExpectations(field, numTerms, storeOffsets, storePositions); int start = 0; int deltaLength = 0; int totalLength = 0; byte[] byteBuffer; char[] charBuffer; bool preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES; // init the buffers if (preUTF8) { charBuffer = new char[10]; byteBuffer = null; } else { charBuffer = null; byteBuffer = new byte[20]; } for (int i = 0; i < numTerms; i++) { start = tvf.ReadVInt(state); deltaLength = tvf.ReadVInt(state); totalLength = start + deltaLength; System.String term; if (preUTF8) { // Term stored as java chars if (charBuffer.Length < totalLength) { char[] newCharBuffer = new char[(int)(1.5 * totalLength)]; Array.Copy(charBuffer, 0, newCharBuffer, 0, start); charBuffer = newCharBuffer; } tvf.ReadChars(charBuffer, start, deltaLength, state); term = new System.String(charBuffer, 0, totalLength); } else { // Term stored as utf8 bytes if (byteBuffer.Length < totalLength) { byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)]; Array.Copy(byteBuffer, 0, newByteBuffer, 0, start); byteBuffer = newByteBuffer; } tvf.ReadBytes(byteBuffer, start, deltaLength, state); term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength); } int freq = tvf.ReadVInt(state); int[] positions = null; if (storePositions) { //read in the positions //does the mapper even care about positions? if (mapper.IsIgnoringPositions == false) { positions = new int[freq]; int prevPosition = 0; for (int j = 0; j < freq; j++) { positions[j] = prevPosition + tvf.ReadVInt(state); prevPosition = positions[j]; } } else { //we need to skip over the positions. Since these are VInts, I don't believe there is anyway to know for sure how far to skip // for (int j = 0; j < freq; j++) { tvf.ReadVInt(state); } } } TermVectorOffsetInfo[] offsets = null; if (storeOffsets) { //does the mapper even care about offsets? if (mapper.IsIgnoringOffsets == false) { offsets = new TermVectorOffsetInfo[freq]; int prevOffset = 0; for (int j = 0; j < freq; j++) { int startOffset = prevOffset + tvf.ReadVInt(state); int endOffset = startOffset + tvf.ReadVInt(state); offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset); prevOffset = endOffset; } } else { for (int j = 0; j < freq; j++) { tvf.ReadVInt(state); tvf.ReadVInt(state); } } } mapper.Map(term, freq, offsets, positions); } }
/// <summary> /// Skip the next block of data. /// </summary> /// <param name="in"> the input where to read data </param> /// <exception cref="IOException"> If there is a low-level I/O error </exception> public void SkipBlock(IndexInput @in) { int numBits = @in.ReadByte(); if (numBits == ALL_VALUES_EQUAL) { @in.ReadVInt(); return; } Debug.Assert(numBits > 0 && numBits <= 32, numBits.ToString()); int encodedSize = EncodedSizes[numBits]; @in.Seek(@in.FilePointer + encodedSize); }
/// <summary> /// Read values that have been written using variable-length encoding instead of bit-packing. /// </summary> internal static void ReadVIntBlock(IndexInput docIn, int[] docBuffer, int[] freqBuffer, int num, bool indexHasFreq) { if (indexHasFreq) { for (int i = 0; i < num; i++) { int code = docIn.ReadVInt(); docBuffer[i] = (int)((uint)code >> 1); if ((code & 1) != 0) { freqBuffer[i] = 1; } else { freqBuffer[i] = docIn.ReadVInt(); } } } else { for (int i = 0; i < num; i++) { docBuffer[i] = docIn.ReadVInt(); } } }
/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { // Load the index into memory string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION); indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingTermVectorsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); if (version >= CompressingTermVectorsWriter.VERSION_CHECKSUM) { indexStream.ReadVLong(); // the end of the data file CodecUtil.CheckFooter(indexStream); } else { CodecUtil.CheckEOF(indexStream); } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata string vectorsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_EXTENSION); vectorsStream = d.OpenInput(vectorsStreamFN, context); string codecNameDat = formatName + CompressingTermVectorsWriter.CODEC_SFX_DAT; int version2 = CodecUtil.CheckHeader(vectorsStream, codecNameDat, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); if (version != version2) { throw new Exception("Version mismatch between stored fields index and data: " + version + " != " + version2); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.FilePointer); packedIntsVersion = vectorsStream.ReadVInt(); chunkSize = vectorsStream.ReadVInt(); decompressor = compressionMode.NewDecompressor(); this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this, indexStream); } } }
private static IDictionary<string, FileEntry> ReadLegacyEntries(IndexInput stream, int firstInt) { IDictionary<string, FileEntry> entries = new Dictionary<string, FileEntry>(); int count; bool stripSegmentName; if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION) { if (firstInt < CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX) { throw new CorruptIndexException("Incompatible format version: " + firstInt + " expected >= " + CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX + " (resource: " + stream + ")"); } // It's a post-3.1 index, read the count. count = stream.ReadVInt(); stripSegmentName = false; } else { count = firstInt; stripSegmentName = true; } // read the directory and init files long streamLength = stream.Length(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); if (offset < 0 || offset > streamLength) { throw new CorruptIndexException("Invalid CFS entry offset: " + offset + " (resource: " + stream + ")"); } string id = stream.ReadString(); if (stripSegmentName) { // Fix the id to not include the segment names. this is relevant for // pre-3.1 indexes. id = IndexFileNames.StripSegmentName(id); } if (entry != null) { // set length of the previous entry entry.Length = offset - entry.Offset; } entry = new FileEntry(); entry.Offset = offset; FileEntry previous = entries[id] = entry; if (previous != null) { throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + stream); } } // set the length of the final entry if (entry != null) { entry.Length = streamLength - entry.Offset; } return entries; }
private void ReadFields(IndexInput meta) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { numerics[fieldNumber] = ReadNumericEntry(meta); } else if (fieldType == BYTES) { binaries[fieldNumber] = ReadBinaryEntry(meta); } else if (fieldType == SORTED) { sorteds[fieldNumber] = ReadSortedEntry(meta); } else if (fieldType == SORTED_SET) { sortedSets[fieldNumber] = ReadSortedSetEntry(meta); } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt(); } }
/// <summary> /// Read the next block of data (<code>For</code> format). /// </summary> /// <param name="in"> the input to use to read data </param> /// <param name="encoded"> a buffer that can be used to store encoded data </param> /// <param name="decoded"> where to write decoded data </param> /// <exception cref="IOException"> If there is a low-level I/O error </exception> public void ReadBlock(IndexInput @in, sbyte[] encoded, int[] decoded) { int numBits = @in.ReadByte(); Debug.Assert(numBits <= 32, numBits.ToString()); if (numBits == ALL_VALUES_EQUAL) { int value = @in.ReadVInt(); CollectionsHelper.Fill(decoded, 0, Lucene41PostingsFormat.BLOCK_SIZE, value); return; } int encodedSize = EncodedSizes[numBits]; @in.ReadBytes(encoded, 0, encodedSize); PackedInts.Decoder decoder = Decoders[numBits]; int iters = Iterations[numBits]; Debug.Assert(iters * decoder.ByteValueCount() >= Lucene41PostingsFormat.BLOCK_SIZE); decoder.Decode(encoded, 0, decoded, 0, iters); }
/// <summary> /// Helper method that reads CFS entries from an input stream </summary> private static IDictionary <string, FileEntry> ReadEntries(IndexInputSlicer handle, Directory dir, string name) { System.IO.IOException priorE = null; IndexInput stream = null; ChecksumIndexInput entriesStream = null; // read the first VInt. If it is negative, it's the version number // otherwise it's the count (pre-3.1 indexes) try { IDictionary <string, FileEntry> mapping; stream = handle.OpenFullSlice(); int firstInt = stream.ReadVInt(); // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible // and separate norms/etc are outside of cfs. if (firstInt == CODEC_MAGIC_BYTE1) { byte secondByte = stream.ReadByte(); byte thirdByte = stream.ReadByte(); byte fourthByte = stream.ReadByte(); if (secondByte != CODEC_MAGIC_BYTE2 || thirdByte != CODEC_MAGIC_BYTE3 || fourthByte != CODEC_MAGIC_BYTE4) { throw new CorruptIndexException("Illegal/impossible header for CFS file: " + secondByte + "," + thirdByte + "," + fourthByte); } int version = CodecUtil.CheckHeaderNoMagic(stream, CompoundFileWriter.DATA_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT); string entriesFileName = IndexFileNames.SegmentFileName(IndexFileNames.StripExtension(name), "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); entriesStream = dir.OpenChecksumInput(entriesFileName, IOContext.READONCE); CodecUtil.CheckHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT); int numEntries = entriesStream.ReadVInt(); mapping = new Dictionary <string, FileEntry>(numEntries); for (int i = 0; i < numEntries; i++) { FileEntry fileEntry = new FileEntry(); string id = entriesStream.ReadString(); //If the key was already present if (mapping.ContainsKey(id)) { throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + entriesStream); } else { mapping[id] = fileEntry; } fileEntry.Offset = entriesStream.ReadLong(); fileEntry.Length = entriesStream.ReadLong(); } if (version >= CompoundFileWriter.VERSION_CHECKSUM) { CodecUtil.CheckFooter(entriesStream); } else { CodecUtil.CheckEOF(entriesStream); } } else { // TODO remove once 3.x is not supported anymore mapping = ReadLegacyEntries(stream, firstInt); } return(mapping); } catch (System.IO.IOException ioe) { priorE = ioe; } finally { IOUtils.CloseWhileHandlingException(priorE, stream, entriesStream); } // this is needed until Java 7's real try-with-resources: throw new InvalidOperationException("impossible to get here"); }
public override void Init(IndexInput termsIn) { // Make sure we are talking to the matching postings writer CodecUtil.CheckHeader(termsIn, Lucene41PostingsWriter.TERMS_CODEC, Lucene41PostingsWriter.VERSION_START, Lucene41PostingsWriter.VERSION_CURRENT); int indexBlockSize = termsIn.ReadVInt(); if (indexBlockSize != Lucene41PostingsFormat.BLOCK_SIZE) { throw new InvalidOperationException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + Lucene41PostingsFormat.BLOCK_SIZE + ")"); } }
public virtual void Get(int docNum, System.String field, TermVectorMapper mapper, IState state) { if (tvx != null) { int fieldNumber = fieldInfos.FieldNumber(field); //We need to account for the FORMAT_SIZE at when seeking in the tvx //We don't need to do this in other seeks because we already have the // file pointer //that was written in another file SeekTvx(docNum, state); //System.out.println("TVX Pointer: " + tvx.getFilePointer()); long tvdPosition = tvx.ReadLong(state); tvd.Seek(tvdPosition, state); int fieldCount = tvd.ReadVInt(state); //System.out.println("Num Fields: " + fieldCount); // There are only a few fields per document. We opt for a full scan // rather then requiring that they be ordered. We need to read through // all of the fields anyway to get to the tvf pointers. int number = 0; int found = -1; for (int i = 0; i < fieldCount; i++) { if (format >= FORMAT_VERSION) { number = tvd.ReadVInt(state); } else { number += tvd.ReadVInt(state); } if (number == fieldNumber) { found = i; } } // This field, although valid in the segment, was not found in this // document if (found != -1) { // Compute position in the tvf file long position; if (format >= FORMAT_VERSION2) { position = tvx.ReadLong(state); } else { position = tvd.ReadVLong(state); } for (int i = 1; i <= found; i++) { position += tvd.ReadVLong(state); } mapper.SetDocumentNumber(docNum); ReadTermVector(field, position, mapper, state); } else { //System.out.println("Fieldable not found"); } } else { //System.out.println("No tvx file"); } }