/// <summary>Increments the enumeration to the next element. True if one exists.</summary> public override bool Next() { if (position++ >= size - 1) { prevBuffer.Set(termBuffer); termBuffer.Reset(); return(false); } prevBuffer.Set(termBuffer); termBuffer.Read(input, fieldInfos); termInfo.docFreq = input.ReadVInt(); // read doc freq termInfo.freqPointer += input.ReadVLong(); // read freq pointer termInfo.proxPointer += input.ReadVLong(); // read prox pointer if (format == -1) { // just read skipOffset in order to increment file pointer; // value is never used since skipTo is switched off if (!isIndex) { if (termInfo.docFreq > formatM1SkipInterval) { termInfo.skipOffset = input.ReadVInt(); } } } else { if (termInfo.docFreq >= skipInterval) { termInfo.skipOffset = input.ReadVInt(); } } if (isIndex) { indexPointer += input.ReadVLong(); // read index pointer } return(true); }
/// <summary> Retrieve the term vector for the given document and field</summary> /// <param name="docNum">The document number to retrieve the vector for /// </param> /// <param name="field">The field within the document to retrieve /// </param> /// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field. /// </returns> /// <throws> IOException if there is an error reading the term vector files </throws> public /*internal*/ virtual TermFreqVector Get(int docNum, System.String field) { // Check if no term vectors are available for this segment at all int fieldNumber = fieldInfos.FieldNumber(field); TermFreqVector result = null; if (tvx != null) { //We need to account for the FORMAT_SIZE at when seeking in the tvx //We don't need to do this in other seeks because we already have the // file pointer //that was written in another file tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE); //System.out.println("TVX Pointer: " + tvx.getFilePointer()); long position = tvx.ReadLong(); tvd.Seek(position); int fieldCount = tvd.ReadVInt(); //System.out.println("Num Fields: " + fieldCount); // There are only a few fields per document. We opt for a full scan // rather then requiring that they be ordered. We need to read through // all of the fields anyway to get to the tvf pointers. int number = 0; int found = -1; for (int i = 0; i < fieldCount; i++) { if (tvdFormat == TermVectorsWriter.FORMAT_VERSION) { number = tvd.ReadVInt(); } else { number += tvd.ReadVInt(); } if (number == fieldNumber) { found = i; } } // This field, although valid in the segment, was not found in this // document if (found != -1) { // Compute position in the tvf file position = 0; for (int i = 0; i <= found; i++) { position += tvd.ReadVLong(); } result = ReadTermVector(field, position); } else { //System.out.println("Field not found"); } } else { //System.out.println("No tvx file"); } return(result); }
public virtual void Get(int docNum, System.String field, TermVectorMapper mapper) { if (tvx != null) { int fieldNumber = fieldInfos.FieldNumber(field); //We need to account for the FORMAT_SIZE at when seeking in the tvx //We don't need to do this in other seeks because we already have the // file pointer //that was written in another file SeekTvx(docNum); //System.out.println("TVX Pointer: " + tvx.getFilePointer()); long tvdPosition = tvx.ReadLong(); tvd.Seek(tvdPosition); int fieldCount = tvd.ReadVInt(); //System.out.println("Num Fields: " + fieldCount); // There are only a few fields per document. We opt for a full scan // rather then requiring that they be ordered. We need to read through // all of the fields anyway to get to the tvf pointers. int number = 0; int found = -1; for (int i = 0; i < fieldCount; i++) { if (format >= FORMAT_VERSION) { number = tvd.ReadVInt(); } else { number += tvd.ReadVInt(); } if (number == fieldNumber) { found = i; } } // This field, although valid in the segment, was not found in this // document if (found != -1) { // Compute position in the tvf file long position; if (format >= FORMAT_VERSION2) { position = tvx.ReadLong(); } else { position = tvd.ReadVLong(); } for (int i = 1; i <= found; i++) { position += tvd.ReadVLong(); } mapper.SetDocumentNumber(docNum); ReadTermVector(field, position, mapper); } else { //System.out.println("Fieldable not found"); } } else { //System.out.println("No tvx file"); } }
internal readonly PackedInts.Reader[] StartPointersDeltas; // delta from the avg #endregion Fields #region Constructors // It is the responsibility of the caller to close fieldsIndexIn after this constructor // has been called internal CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) { MaxDoc = si.DocCount; int[] docBases = new int[16]; long[] startPointers = new long[16]; int[] avgChunkDocs = new int[16]; long[] avgChunkSizes = new long[16]; PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16]; PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16]; int packedIntsVersion = fieldsIndexIn.ReadVInt(); int blockCount = 0; for (; ; ) { int numChunks = fieldsIndexIn.ReadVInt(); if (numChunks == 0) { break; } if (blockCount == docBases.Length) { int newSize = ArrayUtil.Oversize(blockCount + 1, 8); docBases = Arrays.CopyOf(docBases, newSize); startPointers = Arrays.CopyOf(startPointers, newSize); avgChunkDocs = Arrays.CopyOf(avgChunkDocs, newSize); avgChunkSizes = Arrays.CopyOf(avgChunkSizes, newSize); docBasesDeltas = Arrays.CopyOf(docBasesDeltas, newSize); startPointersDeltas = Arrays.CopyOf(startPointersDeltas, newSize); } // doc bases docBases[blockCount] = fieldsIndexIn.ReadVInt(); avgChunkDocs[blockCount] = fieldsIndexIn.ReadVInt(); int bitsPerDocBase = fieldsIndexIn.ReadVInt(); if (bitsPerDocBase > 32) { throw new CorruptIndexException("Corrupted bitsPerDocBase (resource=" + fieldsIndexIn + ")"); } docBasesDeltas[blockCount] = PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase); // start pointers startPointers[blockCount] = fieldsIndexIn.ReadVLong(); avgChunkSizes[blockCount] = fieldsIndexIn.ReadVLong(); int bitsPerStartPointer = fieldsIndexIn.ReadVInt(); if (bitsPerStartPointer > 64) { throw new CorruptIndexException("Corrupted bitsPerStartPointer (resource=" + fieldsIndexIn + ")"); } startPointersDeltas[blockCount] = PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer); ++blockCount; } this.DocBases = Arrays.CopyOf(docBases, blockCount); this.StartPointers = Arrays.CopyOf(startPointers, blockCount); this.AvgChunkDocs = Arrays.CopyOf(avgChunkDocs, blockCount); this.AvgChunkSizes = Arrays.CopyOf(avgChunkSizes, blockCount); this.DocBasesDeltas = Arrays.CopyOf(docBasesDeltas, blockCount); this.StartPointersDeltas = Arrays.CopyOf(startPointersDeltas, blockCount); }
internal static NumericEntry ReadNumericEntry(IndexInput meta) { NumericEntry entry = new NumericEntry(); entry.Format = meta.ReadVInt(); entry.MissingOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.Offset = meta.ReadLong(); entry.Count = meta.ReadVLong(); entry.BlockSize = meta.ReadVInt(); switch (entry.Format) { case Lucene45DocValuesConsumer.GCD_COMPRESSED: entry.MinValue = meta.ReadLong(); entry.Gcd = meta.ReadLong(); break; case Lucene45DocValuesConsumer.TABLE_COMPRESSED: if (entry.Count > int.MaxValue) { throw new Exception("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta); } int uniqueValues = meta.ReadVInt(); if (uniqueValues > 256) { throw new Exception("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta); } entry.Table = new long[uniqueValues]; for (int i = 0; i < uniqueValues; ++i) { entry.Table[i] = meta.ReadLong(); } break; case Lucene45DocValuesConsumer.DELTA_COMPRESSED: break; default: throw new Exception("Unknown format: " + entry.Format + ", input=" + meta); } return entry; }
internal static BinaryEntry ReadBinaryEntry(IndexInput meta) { BinaryEntry entry = new BinaryEntry(); entry.Format = meta.ReadVInt(); entry.MissingOffset = meta.ReadLong(); entry.MinLength = meta.ReadVInt(); entry.MaxLength = meta.ReadVInt(); entry.Count = meta.ReadVLong(); entry.Offset = meta.ReadLong(); switch (entry.Format) { case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED: break; case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED: entry.AddressInterval = meta.ReadVInt(); entry.AddressesOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); break; case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED: entry.AddressesOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); break; default: throw new Exception("Unknown format: " + entry.Format + ", input=" + meta); } return entry; }