/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(); if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = (1 == input.ReadByte()); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = -1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte)input.ReadByte(); preLockless = (isCompoundFile == CHECK_DIR); if (format <= SegmentInfos.FORMAT_DEL_COUNT) { delCount = input.ReadInt(); System.Diagnostics.Debug.Assert(delCount <= docCount); } else { delCount = -1; } if (format <= SegmentInfos.FORMAT_HAS_PROX) { hasProx = input.ReadByte() == 1; } else { hasProx = true; } } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte)(CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = -1; docStoreIsCompoundFile = false; docStoreSegment = null; delCount = -1; hasProx = true; } }
public /*internal*/ Document Doc(int n) { indexStream.Seek(n * 8L); long position = indexStream.ReadLong(); fieldsStream.Seek(position); Document doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); byte bits = fieldsStream.ReadByte(); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); if (compressed) { doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS)); } else { doc.Add(new Field(fi.name, b, Field.Store.YES)); } } else { Field.Index index; Field.Store store = Field.Store.YES; if (fi.isIndexed && tokenize) { index = Field.Index.TOKENIZED; } else if (fi.isIndexed && !tokenize) { index = Field.Index.UN_TOKENIZED; } else { index = Field.Index.NO; } Field.TermVector termVector = null; if (fi.storeTermVector) { if (fi.storeOffsetWithTermVector) { if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else { termVector = Field.TermVector.WITH_OFFSETS; } } else if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS; } else { termVector = Field.TermVector.YES; } } else { termVector = Field.TermVector.NO; } if (compressed) { store = Field.Store.COMPRESS; byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } else { Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } } } return(doc); }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(); if (docStoreOffset != - 1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = (1 == input.ReadByte()); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = - 1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte) input.ReadByte(); preLockless = (isCompoundFile == CHECK_DIR); if (format <= SegmentInfos.FORMAT_DEL_COUNT) { delCount = input.ReadInt(); System.Diagnostics.Debug.Assert(delCount <= docCount); } else delCount = - 1; if (format <= SegmentInfos.FORMAT_HAS_PROX) hasProx = input.ReadByte() == 1; else hasProx = true; if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) { diagnostics = input.ReadStringStringMap(); } else { diagnostics = new System.Collections.Generic.Dictionary<string,string>(); } } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte) (CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = - 1; docStoreIsCompoundFile = false; docStoreSegment = null; delCount = - 1; hasProx = true; diagnostics = new System.Collections.Generic.Dictionary<string,string>(); } }
private void Read(IndexInput input) { int size = input.ReadVInt(); //read in the size for (int i = 0; i < size; i++) { System.String name = String.Intern(input.ReadString()); byte bits = input.ReadByte(); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms); } }
/// <summary> </summary> /// <param name="field">The field to read in /// </param> /// <param name="tvfPointer">The pointer within the tvf file where we should start reading /// </param> /// <param name="mapper">The mapper used to map the TermVector /// </param> /// <throws> IOException </throws> private void ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper) { // Now read the data from specified position //We don't need to offset by the FORMAT here since the pointer already includes the offset tvf.Seek(tvfPointer); int numTerms = tvf.ReadVInt(); //System.out.println("Num Terms: " + numTerms); // If no terms - return a constant empty termvector. However, this should never occur! if (numTerms == 0) { return; } bool storePositions; bool storeOffsets; if (format >= FORMAT_VERSION) { byte bits = tvf.ReadByte(); storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; } else { tvf.ReadVInt(); storePositions = false; storeOffsets = false; } mapper.SetExpectations(field, numTerms, storeOffsets, storePositions); int start = 0; int deltaLength = 0; int totalLength = 0; byte[] byteBuffer; char[] charBuffer; bool preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES; // init the buffers if (preUTF8) { charBuffer = new char[10]; byteBuffer = null; } else { charBuffer = null; byteBuffer = new byte[20]; } for (int i = 0; i < numTerms; i++) { start = tvf.ReadVInt(); deltaLength = tvf.ReadVInt(); totalLength = start + deltaLength; System.String term; if (preUTF8) { // Term stored as java chars if (charBuffer.Length < totalLength) { char[] newCharBuffer = new char[(int)(1.5 * totalLength)]; Array.Copy(charBuffer, 0, newCharBuffer, 0, start); charBuffer = newCharBuffer; } tvf.ReadChars(charBuffer, start, deltaLength); term = new System.String(charBuffer, 0, totalLength); } else { // Term stored as utf8 bytes if (byteBuffer.Length < totalLength) { byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)]; Array.Copy(byteBuffer, 0, newByteBuffer, 0, start); byteBuffer = newByteBuffer; } tvf.ReadBytes(byteBuffer, start, deltaLength); term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength); } int freq = tvf.ReadVInt(); int[] positions = null; if (storePositions) { //read in the positions //does the mapper even care about positions? if (mapper.IsIgnoringPositions() == false) { positions = new int[freq]; int prevPosition = 0; for (int j = 0; j < freq; j++) { positions[j] = prevPosition + tvf.ReadVInt(); prevPosition = positions[j]; } } else { //we need to skip over the positions. Since these are VInts, I don't believe there is anyway to know for sure how far to skip // for (int j = 0; j < freq; j++) { tvf.ReadVInt(); } } } TermVectorOffsetInfo[] offsets = null; if (storeOffsets) { //does the mapper even care about offsets? if (mapper.IsIgnoringOffsets() == false) { offsets = new TermVectorOffsetInfo[freq]; int prevOffset = 0; for (int j = 0; j < freq; j++) { int startOffset = prevOffset + tvf.ReadVInt(); int endOffset = startOffset + tvf.ReadVInt(); offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset); prevOffset = endOffset; } } else { for (int j = 0; j < freq; j++) { tvf.ReadVInt(); tvf.ReadVInt(); } } } mapper.Map(term, freq, offsets, positions); } }
/// <summary>read as a d-gaps list </summary> private void ReadDgaps(IndexInput input) { size = input.ReadInt(); // (re)read size count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits int last = 0; int n = Count(); while (n > 0) { last += input.ReadVInt(); bits[last] = input.ReadByte(); n -= BYTE_COUNTS[bits[last] & 0xFF]; } }
/// <summary> /// read as a d-gaps cleared bits list </summary> private void ReadClearedDgaps(IndexInput input) { Size_Renamed = input.ReadInt(); // (re)read size Count_Renamed = input.ReadInt(); // read count Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits for (int i = 0; i < Bits.Length; ++i) { Bits[i] = 0xff; } ClearUnusedBits(); int last = 0; int numCleared = Size() - Count(); while (numCleared > 0) { last += input.ReadVInt(); Bits[last] = input.ReadByte(); numCleared -= 8 - BitUtil.BitCount(Bits[last]); Debug.Assert(numCleared >= 0 || (last == (Bits.Length - 1) && numCleared == -(8 - (Size_Renamed & 7)))); } }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> public SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == - 1) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte) input.ReadByte(); preLockless = isCompoundFile == 0; } else { delGen = 0; normGen = null; isCompoundFile = 0; preLockless = true; hasSingleNormFile = false; } }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616 if (fieldNumber < 0) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new Exception("Invalid field number: " + fieldNumber + " (resource=" + meta + ")"); } byte type = meta.ReadByte(); if (type == Lucene45DocValuesFormat.NUMERIC) { Numerics[fieldNumber] = ReadNumericEntry(meta); } else if (type == Lucene45DocValuesFormat.BINARY) { BinaryEntry b = ReadBinaryEntry(meta); Binaries[fieldNumber] = b; } else if (type == Lucene45DocValuesFormat.SORTED) { ReadSortedField(fieldNumber, meta, infos); } else if (type == Lucene45DocValuesFormat.SORTED_SET) { SortedSetEntry ss = ReadSortedSetEntry(meta); SortedSets[fieldNumber] = ss; if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES) { ReadSortedSetFieldWithAddresses(fieldNumber, meta, infos); } else if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED) { if (meta.ReadVInt() != fieldNumber) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.SORTED) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } ReadSortedField(fieldNumber, meta, infos); } else { throw new Exception(); } } else { throw new Exception("invalid type: " + type + ", resource=" + meta); } fieldNumber = meta.ReadVInt(); } }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); try { int format = input.ReadVInt(); if (format > FORMAT_MINIMUM) { throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); } if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW) { throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); } int size = input.ReadVInt(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.ReadInt() : i; byte bits = input.ReadByte(); bool isIndexed = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0; bool storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0; bool storePayloads = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0; FieldInfo.IndexOptions?indexOptions; if (!isIndexed) { indexOptions = null; } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = FieldInfo.IndexOptions.DOCS_ONLY; } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0) { if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS) { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; } else { throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")"); } } else { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // LUCENE-3027: past indices were able to write // storePayloads=true when omitTFAP is also true, // which is invalid. We correct that, here: if (indexOptions != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { storePayloads = false; } DocValuesType_e?normType = isIndexed && !omitNorms ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null; if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != null) { // RW can have norms but doesn't write them normType = input.ReadByte() != 0 ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null; } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, null, normType, null); } if (input.FilePointer != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length() + " (resource: " + input + ")"); } return(new FieldInfos(infos)); } finally { input.Dispose(); } }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(); if (docStoreOffset != - 1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = (1 == input.ReadByte()); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = - 1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte) input.ReadByte(); preLockless = (isCompoundFile == CHECK_DIR); } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte) (CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = - 1; docStoreIsCompoundFile = false; docStoreSegment = null; } }
/// <summary> </summary> /// <param name="field">The field to read in /// </param> /// <param name="tvfPointer">The pointer within the tvf file where we should start reading /// </param> /// <param name="mapper">The mapper used to map the TermVector /// </param> /// <returns> The TermVector located at that position /// </returns> /// <throws> IOException </throws> private void ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper) { // Now read the data from specified position //We don't need to offset by the FORMAT here since the pointer already includes the offset tvf.Seek(tvfPointer); int numTerms = tvf.ReadVInt(); //System.out.println("Num Terms: " + numTerms); // If no terms - return a constant empty termvector. However, this should never occur! if (numTerms == 0) { return; } bool storePositions; bool storeOffsets; if (tvfFormat == FORMAT_VERSION) { byte bits = tvf.ReadByte(); storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; } else { tvf.ReadVInt(); storePositions = false; storeOffsets = false; } mapper.SetExpectations(field, numTerms, storeOffsets, storePositions); int start = 0; int deltaLength = 0; int totalLength = 0; char[] buffer = new char[10]; // init the buffer with a length of 10 character char[] previousBuffer = new char[] {}; for (int i = 0; i < numTerms; i++) { start = tvf.ReadVInt(); deltaLength = tvf.ReadVInt(); totalLength = start + deltaLength; if (buffer.Length < totalLength) { // increase buffer buffer = null; // give a hint to garbage collector buffer = new char[totalLength]; if (start > 0) { // just copy if necessary Array.Copy(previousBuffer, 0, buffer, 0, start); } } tvf.ReadChars(buffer, start, deltaLength); System.String term = new System.String(buffer, 0, totalLength); previousBuffer = buffer; int freq = tvf.ReadVInt(); int[] positions = null; if (storePositions) { //read in the positions //does the mapper even care about positions? if (mapper.IsIgnoringPositions() == false) { positions = new int[freq]; int prevPosition = 0; for (int j = 0; j < freq; j++) { positions[j] = prevPosition + tvf.ReadVInt(); prevPosition = positions[j]; } } else { //we need to skip over the positions. Since these are VInts, I don't believe there is anyway to know for sure how far to skip // for (int j = 0; j < freq; j++) { tvf.ReadVInt(); } } } TermVectorOffsetInfo[] offsets = null; if (storeOffsets) { //does the mapper even care about offsets? if (mapper.IsIgnoringOffsets() == false) { offsets = new TermVectorOffsetInfo[freq]; int prevOffset = 0; for (int j = 0; j < freq; j++) { int startOffset = prevOffset + tvf.ReadVInt(); int endOffset = startOffset + tvf.ReadVInt(); offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset); prevOffset = endOffset; } } else { for (int j = 0; j < freq; j++) { tvf.ReadVInt(); tvf.ReadVInt(); } } } mapper.Map(term, freq, offsets, positions); } }
private NumericDocValues LoadVarIntsField(FieldInfo field, IndexInput input) { CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT); var header = (sbyte)input.ReadByte(); if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64) { int maxDoc = State.SegmentInfo.DocCount; var values = new long[maxDoc]; for (int i = 0; i < values.Length; i++) { values[i] = input.ReadLong(); } RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values)); return new NumericDocValuesAnonymousInnerClassHelper(values); } else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED) { long minValue = input.ReadLong(); long defaultValue = input.ReadLong(); PackedInts.Reader reader = PackedInts.GetReader(input); RamBytesUsed_Renamed.AddAndGet(reader.RamBytesUsed()); return new NumericDocValuesAnonymousInnerClassHelper2(minValue, defaultValue, reader); } else { throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")"); } }
/// <summary> /// read as a d-gaps list </summary> private void ReadSetDgaps(IndexInput input) { Size_Renamed = input.ReadInt(); // (re)read size Count_Renamed = input.ReadInt(); // read count Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits int last = 0; int n = Count(); while (n > 0) { last += input.ReadVInt(); Bits[last] = input.ReadByte(); n -= BitUtil.BitCount(Bits[last]); Debug.Assert(n >= 0); } }
/// <summary> </summary> /// <param name="field">The field to read in /// </param> /// <param name="tvfPointer">The pointer within the tvf file where we should start reading /// </param> /// <returns> The TermVector located at that position /// </returns> /// <throws> IOException </throws> private SegmentTermVector ReadTermVector(System.String field, long tvfPointer) { // Now read the data from specified position //We don't need to offset by the FORMAT here since the pointer already includes the offset tvf.Seek(tvfPointer); int numTerms = tvf.ReadVInt(); //System.out.println("Num Terms: " + numTerms); // If no terms - return a constant empty termvector. However, this should never occur! if (numTerms == 0) { return(new SegmentTermVector(field, null, null)); } bool storePositions; bool storeOffsets; if (tvfFormat == TermVectorsWriter.FORMAT_VERSION) { byte bits = tvf.ReadByte(); storePositions = (bits & TermVectorsWriter.STORE_POSITIONS_WITH_TERMVECTOR) != 0; storeOffsets = (bits & TermVectorsWriter.STORE_OFFSET_WITH_TERMVECTOR) != 0; } else { tvf.ReadVInt(); storePositions = false; storeOffsets = false; } System.String[] terms = new System.String[numTerms]; int[] termFreqs = new int[numTerms]; // we may not need these, but declare them int[][] positions = null; TermVectorOffsetInfo[][] offsets = null; if (storePositions) { positions = new int[numTerms][]; } if (storeOffsets) { offsets = new TermVectorOffsetInfo[numTerms][]; } int start = 0; int deltaLength = 0; int totalLength = 0; char[] buffer = new char[10]; // init the buffer with a length of 10 character char[] previousBuffer = new char[] {}; for (int i = 0; i < numTerms; i++) { start = tvf.ReadVInt(); deltaLength = tvf.ReadVInt(); totalLength = start + deltaLength; if (buffer.Length < totalLength) { // increase buffer buffer = null; // give a hint to garbage collector buffer = new char[totalLength]; if (start > 0) { // just copy if necessary Array.Copy(previousBuffer, 0, buffer, 0, start); } } tvf.ReadChars(buffer, start, deltaLength); terms[i] = new System.String(buffer, 0, totalLength); previousBuffer = buffer; int freq = tvf.ReadVInt(); termFreqs[i] = freq; if (storePositions) { //read in the positions int[] pos = new int[freq]; positions[i] = pos; int prevPosition = 0; for (int j = 0; j < freq; j++) { pos[j] = prevPosition + tvf.ReadVInt(); prevPosition = pos[j]; } } if (storeOffsets) { TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq]; offsets[i] = offs; int prevOffset = 0; for (int j = 0; j < freq; j++) { int startOffset = prevOffset + tvf.ReadVInt(); int endOffset = startOffset + tvf.ReadVInt(); offs[j] = new TermVectorOffsetInfo(startOffset, endOffset); prevOffset = endOffset; } } } SegmentTermVector tv; if (storePositions || storeOffsets) { tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets); } else { tv = new SegmentTermVector(field, terms, termFreqs); } return(tv); }
private void ReadSortedField(int fieldNumber, IndexInput meta, FieldInfos infos) { // sorted = binary + numeric if (meta.ReadVInt() != fieldNumber) { throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.BINARY) { throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } BinaryEntry b = ReadBinaryEntry(meta); Binaries[fieldNumber] = b; if (meta.ReadVInt() != fieldNumber) { throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC) { throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n = ReadNumericEntry(meta); Ords[fieldNumber] = n; }
public virtual void TestRandomAccess() { SetUp_2(); CompoundFileReader cr = new CompoundFileReader(dir, "f.comp"); // Open two files IndexInput e1 = dir.OpenInput("f11"); IndexInput e2 = dir.OpenInput("f3"); IndexInput a1 = cr.OpenInput("f11"); IndexInput a2 = dir.OpenInput("f3"); // Seek the first pair e1.Seek(100); a1.Seek(100); Assert.AreEqual(100, e1.GetFilePointer()); Assert.AreEqual(100, a1.GetFilePointer()); byte be1 = e1.ReadByte(); byte ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now seek the second pair e2.Seek(1027); a2.Seek(1027); Assert.AreEqual(1027, e2.GetFilePointer()); Assert.AreEqual(1027, a2.GetFilePointer()); byte be2 = e2.ReadByte(); byte ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Now make sure the first one didn't move Assert.AreEqual(101, e1.GetFilePointer()); Assert.AreEqual(101, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now more the first one again, past the buffer length e1.Seek(1910); a1.Seek(1910); Assert.AreEqual(1910, e1.GetFilePointer()); Assert.AreEqual(1910, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now make sure the second set didn't move Assert.AreEqual(1028, e2.GetFilePointer()); Assert.AreEqual(1028, a2.GetFilePointer()); be2 = e2.ReadByte(); ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Move the second set back, again cross the buffer size e2.Seek(17); a2.Seek(17); Assert.AreEqual(17, e2.GetFilePointer()); Assert.AreEqual(17, a2.GetFilePointer()); be2 = e2.ReadByte(); ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Finally, make sure the first set didn't move // Now make sure the first one didn't move Assert.AreEqual(1911, e1.GetFilePointer()); Assert.AreEqual(1911, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); e1.Close(); e2.Close(); a1.Close(); a2.Close(); cr.Close(); }
private void ReadSortedSetFieldWithAddresses(int fieldNumber, IndexInput meta, FieldInfos infos) { // sortedset = binary + numeric (addresses) + ordIndex if (meta.ReadVInt() != fieldNumber) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.BINARY) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } BinaryEntry b = ReadBinaryEntry(meta); Binaries[fieldNumber] = b; if (meta.ReadVInt() != fieldNumber) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n1 = ReadNumericEntry(meta); Ords[fieldNumber] = n1; if (meta.ReadVInt() != fieldNumber) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n2 = ReadNumericEntry(meta); OrdIndexes[fieldNumber] = n2; }
private void Read(IndexInput input, System.String fileName) { int firstInt = input.ReadVInt(); if (firstInt < 0) { // This is a real format format = firstInt; } else { format = FORMAT_PRE; } if (format != FORMAT_PRE & format != FORMAT_START) { throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\""); } int size; if (format == FORMAT_PRE) { size = firstInt; } else { size = input.ReadVInt(); //read in the size } for (int i = 0; i < size; i++) { System.String name = StringHelper.Intern(input.ReadString()); byte bits = input.ReadByte(); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; bool storePayloads = (bits & STORE_PAYLOADS) != 0; bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } if (input.GetFilePointer() != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length()); } }
public override long Get(int index) { long majorBitPos = (long)index * bitsPerValue; long elementPos = (long)((ulong)majorBitPos >> 3); try { @in.Seek(StartPointer + elementPos); int bitPos = (int)(majorBitPos & 7); // round up bits to a multiple of 8 to find total bytes needed to read int roundedBits = ((bitPos + bitsPerValue + 7) & ~7); // the number of extra bits read at the end to shift out int shiftRightBits = roundedBits - bitPos - bitsPerValue; long rawValue; switch ((int)((uint)roundedBits >> 3)) { case 1: rawValue = @in.ReadByte(); break; case 2: rawValue = @in.ReadShort(); break; case 3: rawValue = ((long)@in.ReadShort() << 8) | (@in.ReadByte() & 0xFFL); break; case 4: rawValue = @in.ReadInt(); break; case 5: rawValue = ((long)@in.ReadInt() << 8) | (@in.ReadByte() & 0xFFL); break; case 6: rawValue = ((long)@in.ReadInt() << 16) | (@in.ReadShort() & 0xFFFFL); break; case 7: rawValue = ((long)@in.ReadInt() << 24) | ((@in.ReadShort() & 0xFFFFL) << 8) | (@in.ReadByte() & 0xFFL); break; case 8: rawValue = @in.ReadLong(); break; case 9: // We must be very careful not to shift out relevant bits. So we account for right shift // we would normally do on return here, and reset it. rawValue = (@in.ReadLong() << (8 - shiftRightBits)) | ((int)((uint)(@in.ReadByte() & 0xFFL) >> shiftRightBits)); shiftRightBits = 0; break; default: throw new InvalidOperationException("bitsPerValue too large: " + bitsPerValue); } return(((long)((ulong)rawValue >> shiftRightBits)) & ValueMask); } catch (System.IO.IOException ioe) { throw new InvalidOperationException("failed", ioe); } }