protected override MonotonicBlockPackedReader GetAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) { data.Seek(bytes.AddressesOffset); return new MonotonicBlockPackedReader((IndexInput)data.Clone(), bytes.PackedIntsVersion, bytes.BlockSize, bytes.Count, true); }
protected override MonotonicBlockPackedReader GetOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) { data.Seek(entry.Offset); return new MonotonicBlockPackedReader((IndexInput)data.Clone(), entry.PackedIntsVersion, entry.BlockSize, entry.Count, true); }
public override void VisitDocument(int n, StoredFieldVisitor visitor) { _input.Seek(_offsets[n]); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NUM)); } var numFields = ParseInt32At(SimpleTextStoredFieldsWriter.NUM.Length); for (var i = 0; i < numFields; i++) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.FIELD)); } int fieldNumber = ParseInt32At(SimpleTextStoredFieldsWriter.FIELD.Length); FieldInfo fieldInfo = _fieldInfos.FieldInfo(fieldNumber); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NAME)); } ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.TYPE)); } BytesRef type; if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_STRING, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_STRING; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_BINARY, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_BINARY; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_INT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_INT; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_LONG, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_LONG; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_FLOAT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_FLOAT; } else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_DOUBLE, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length)) { type = SimpleTextStoredFieldsWriter.TYPE_DOUBLE; } else { throw new Exception("unknown field type"); } switch (visitor.NeedsField(fieldInfo)) { case StoredFieldVisitor.Status.YES: ReadField(type, fieldInfo, visitor); break; case StoredFieldVisitor.Status.NO: ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE)); } break; case StoredFieldVisitor.Status.STOP: return; } } }
public override int NextPosition() { if (lazyProxPointer != -1) { proxIn.Seek(lazyProxPointer); lazyProxPointer = -1; } if (payloadPending && payloadLength > 0) { // payload of last position was never retrieved -- skip it proxIn.Seek(proxIn.Position + payloadLength); // LUCENENET specific: Renamed from getFilePointer() to match FileStream payloadPending = false; } // scan over any docs that were iterated without their positions while (posPendingCount > freq) { int code = proxIn.ReadVInt32(); if (storePayloads) { if ((code & 1) != 0) { // new payload length payloadLength = proxIn.ReadVInt32(); if (Debugging.AssertsEnabled) { Debugging.Assert(payloadLength >= 0); } } if (Debugging.AssertsEnabled) { Debugging.Assert(payloadLength != -1); } } if (storeOffsets) { if ((proxIn.ReadVInt32() & 1) != 0) { // new offset length offsetLength = proxIn.ReadVInt32(); } } if (storePayloads) { proxIn.Seek(proxIn.Position + payloadLength); // LUCENENET specific: Renamed from getFilePointer() to match FileStream } posPendingCount--; position = 0; startOffset = 0; payloadPending = false; //System.out.println("StandardR.D&PE skipPos"); } // read next position if (payloadPending && payloadLength > 0) { // payload wasn't retrieved for last position proxIn.Seek(proxIn.Position + payloadLength); // LUCENENET specific: Renamed from getFilePointer() to match FileStream } int code_ = proxIn.ReadVInt32(); if (storePayloads) { if ((code_ & 1) != 0) { // new payload length payloadLength = proxIn.ReadVInt32(); if (Debugging.AssertsEnabled) { Debugging.Assert(payloadLength >= 0); } } if (Debugging.AssertsEnabled) { Debugging.Assert(payloadLength != -1); } payloadPending = true; code_ = code_.TripleShift(1); } position += code_; if (storeOffsets) { int offsetCode = proxIn.ReadVInt32(); if ((offsetCode & 1) != 0) { // new offset length offsetLength = proxIn.ReadVInt32(); } startOffset += offsetCode.TripleShift(1); } posPendingCount--; if (Debugging.AssertsEnabled) { Debugging.Assert(posPendingCount >= 0, "NextPosition() was called too many times (more than Freq times) posPendingCount={0}", posPendingCount); } //System.out.println("StandardR.D&PE nextPos return pos=" + position); return(position); }
protected override void ReadInternal(byte[] b, int offset, int length) { SimOutage(); @delegate.Seek(GetFilePointer()); @delegate.ReadBytes(b, offset, length); }
public override int NextDoc() { bool first = true; _in.Seek(_nextDocStart); long posStart = 0; while (true) { long lineStart = _in.GetFilePointer(); SimpleTextUtil.ReadLine(_in, _scratch); //System.out.println("NEXT DOC: " + scratch.utf8ToString()); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _nextDocStart = lineStart; _in.Seek(posStart); return(_docId); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); _docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); _tf = 0; first = false; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); _tf = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); posStart = _in.GetFilePointer(); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END)); if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _nextDocStart = lineStart; _in.Seek(posStart); return(_docId); } return(_docId = NO_MORE_DOCS); } } }
internal virtual void SeekTvx(int docNum) { tvx.Seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE); }
public override void Seek(long pos) { ii.Seek(pos); }
public CoreFieldIndex(FieldIndexData outerInstance, long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, int numIndexTerms) { this.termsStart = termsStart; termBytesStart = outerInstance.outerInstance.termBytes.Pointer; IndexInput clone = (IndexInput)outerInstance.outerInstance.input.Clone(); clone.Seek(indexStart); // -1 is passed to mean "don't load term index", but // if we are then later loaded it's overwritten with // a real value Debug.Assert(outerInstance.outerInstance.indexDivisor > 0); this.numIndexTerms = 1 + (numIndexTerms - 1) / outerInstance.outerInstance.indexDivisor; Debug.Assert(this.numIndexTerms > 0, "numIndexTerms=" + numIndexTerms + " indexDivisor=" + outerInstance.outerInstance.indexDivisor); if (outerInstance.outerInstance.indexDivisor == 1) { // Default (load all index terms) is fast -- slurp in the images from disk: try { long numTermBytes = packedIndexStart - indexStart; outerInstance.outerInstance.termBytes.Copy(clone, numTermBytes); // records offsets into main terms dict file termsDictOffsets = PackedInt32s.GetReader(clone); Debug.Assert(termsDictOffsets.Count == numIndexTerms); // records offsets into byte[] term data termOffsets = PackedInt32s.GetReader(clone); Debug.Assert(termOffsets.Count == 1 + numIndexTerms); } finally { clone.Dispose(); } } else { // Get packed iterators IndexInput clone1 = (IndexInput)outerInstance.outerInstance.input.Clone(); IndexInput clone2 = (IndexInput)outerInstance.outerInstance.input.Clone(); try { // Subsample the index terms clone1.Seek(packedIndexStart); PackedInt32s.IReaderIterator termsDictOffsetsIter = PackedInt32s.GetReaderIterator(clone1, PackedInt32s.DEFAULT_BUFFER_SIZE); clone2.Seek(packedOffsetsStart); PackedInt32s.IReaderIterator termOffsetsIter = PackedInt32s.GetReaderIterator(clone2, PackedInt32s.DEFAULT_BUFFER_SIZE); // TODO: often we can get by w/ fewer bits per // value, below.. .but this'd be more complex: // we'd have to try @ fewer bits and then grow // if we overflowed it. PackedInt32s.Mutable termsDictOffsetsM = PackedInt32s.GetMutable(this.numIndexTerms, termsDictOffsetsIter.BitsPerValue, PackedInt32s.DEFAULT); PackedInt32s.Mutable termOffsetsM = PackedInt32s.GetMutable(this.numIndexTerms + 1, termOffsetsIter.BitsPerValue, PackedInt32s.DEFAULT); termsDictOffsets = termsDictOffsetsM; termOffsets = termOffsetsM; int upto = 0; long termOffsetUpto = 0; while (upto < this.numIndexTerms) { // main file offset copies straight over termsDictOffsetsM.Set(upto, termsDictOffsetsIter.Next()); termOffsetsM.Set(upto, termOffsetUpto); long termOffset = termOffsetsIter.Next(); long nextTermOffset = termOffsetsIter.Next(); int numTermBytes = (int)(nextTermOffset - termOffset); clone.Seek(indexStart + termOffset); Debug.Assert(indexStart + termOffset < clone.Length, "indexStart=" + indexStart + " termOffset=" + termOffset + " len=" + clone.Length); Debug.Assert(indexStart + termOffset + numTermBytes < clone.Length); outerInstance.outerInstance.termBytes.Copy(clone, numTermBytes); termOffsetUpto += numTermBytes; upto++; if (upto == this.numIndexTerms) { break; } // skip terms: termsDictOffsetsIter.Next(); for (int i = 0; i < outerInstance.outerInstance.indexDivisor - 2; i++) { termOffsetsIter.Next(); termsDictOffsetsIter.Next(); } } termOffsetsM.Set(upto, termOffsetUpto); } finally { clone1.Dispose(); clone2.Dispose(); clone.Dispose(); } } }
public override int NextPosition() { if (LazyProxPointer != -1) { ProxIn.Seek(LazyProxPointer); LazyProxPointer = -1; } if (PayloadPending && PayloadLength > 0) { // payload of last position was never retrieved -- skip it ProxIn.Seek(ProxIn.FilePointer + PayloadLength); PayloadPending = false; } // scan over any docs that were iterated without their positions while (PosPendingCount > Freq_Renamed) { int code = ProxIn.ReadVInt(); if (StorePayloads) { if ((code & 1) != 0) { // new payload length PayloadLength = ProxIn.ReadVInt(); Debug.Assert(PayloadLength >= 0); } Debug.Assert(PayloadLength != -1); } if (StoreOffsets) { if ((ProxIn.ReadVInt() & 1) != 0) { // new offset length OffsetLength = ProxIn.ReadVInt(); } } if (StorePayloads) { ProxIn.Seek(ProxIn.FilePointer + PayloadLength); } PosPendingCount--; Position = 0; StartOffset_Renamed = 0; PayloadPending = false; //System.out.println("StandardR.D&PE skipPos"); } // read next position if (PayloadPending && PayloadLength > 0) { // payload wasn't retrieved for last position ProxIn.Seek(ProxIn.FilePointer + PayloadLength); } int code_ = ProxIn.ReadVInt(); if (StorePayloads) { if ((code_ & 1) != 0) { // new payload length PayloadLength = ProxIn.ReadVInt(); Debug.Assert(PayloadLength >= 0); } Debug.Assert(PayloadLength != -1); PayloadPending = true; code_ = (int)((uint)code_ >> 1); } Position += code_; if (StoreOffsets) { int offsetCode = ProxIn.ReadVInt(); if ((offsetCode & 1) != 0) { // new offset length OffsetLength = ProxIn.ReadVInt(); } StartOffset_Renamed += (int)((uint)offsetCode >> 1); } PosPendingCount--; Debug.Assert(PosPendingCount >= 0, "nextPosition() was called too many times (more than freq() times) posPendingCount=" + PosPendingCount); //System.out.println("StandardR.D&PE nextPos return pos=" + position); return(Position); }
public override void Seek(long pos) { _cacheDirIndexInput.Seek(pos); }
internal virtual void LoadTerms() { PositiveIntOutputs posIntOutputs = PositiveIntOutputs.Singleton; Builder <PairOutputs.Pair <long?, PairOutputs.Pair <long?, long?> > > b; PairOutputs <long?, long?> outputsInner = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs); PairOutputs <long?, PairOutputs.Pair <long?, long?> > outputs = new PairOutputs <long?, PairOutputs.Pair <long?, long?> >(posIntOutputs, outputsInner); b = new Builder <>(FST.INPUT_TYPE.BYTE1, outputs); IndexInput @in = (IndexInput)outerInstance._input.Clone(); @in.Seek(termsStart); BytesRef lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; FixedBitSet visitedDocs = new FixedBitSet(maxDoc); IntsRef scratchIntsRef = new IntsRef(); while (true) { SimpleTextUtil.ReadLine(@in, scratch); if (scratch.Equals(END) || StringHelper.StartsWith(scratch, FIELD)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq))); sumTotalTermFreq += totalTermFreq; } break; } else if (StringHelper.StartsWith(scratch, DOC)) { docFreq++; sumDocFreq++; UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length, scratchUTF16); int docID = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); visitedDocs.Set(docID); } else if (StringHelper.StartsWith(scratch, FREQ)) { UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length, scratch.Length - FREQ.length, scratchUTF16); totalTermFreq += ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); } else if (StringHelper.StartsWith(scratch, TERM)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq))); } lastDocsStart = @in.FilePointer; int len = scratch.Length - TERM.length; if (len > lastTerm.Length) { lastTerm.Grow(len); } Array.Copy(scratch.Bytes, TERM.length, lastTerm.Bytes, 0, len); lastTerm.Length = len; docFreq = 0; sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; termCount++; } } docCount = visitedDocs.Cardinality(); fst = b.Finish(); }
public override int NextDoc() { bool first = true; @in.Seek(nextDocStart); long posStart = 0; while (true) { long lineStart = @in.FilePointer; SimpleTextUtil.ReadLine(@in, scratch); //System.out.println("NEXT DOC: " + scratch.utf8ToString()); if (StringHelper.StartsWith(scratch, DOC)) { if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed))) { nextDocStart = lineStart; @in.Seek(posStart); return(docID_Renamed); } UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length, scratchUTF16); docID_Renamed = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); tf = 0; first = false; } else if (StringHelper.StartsWith(scratch, FREQ)) { UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length, scratch.Length - FREQ.length, scratchUTF16); tf = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); posStart = @in.FilePointer; } else if (StringHelper.StartsWith(scratch, POS)) { // skip } else if (StringHelper.StartsWith(scratch, START_OFFSET)) { // skip } else if (StringHelper.StartsWith(scratch, END_OFFSET)) { // skip } else if (StringHelper.StartsWith(scratch, PAYLOAD)) { // skip } else { Debug.Assert(StringHelper.StartsWith(scratch, TERM) || StringHelper.StartsWith(scratch, FIELD) || StringHelper.StartsWith(scratch, END)); if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed))) { nextDocStart = lineStart; @in.Seek(posStart); return(docID_Renamed); } return(docID_Renamed = NO_MORE_DOCS); } } }
private void SeekIndex(int docID) { indexStream.Seek(formatSize + (docID + docStoreOffset) * 8L); }
/// <remarks> /// TODO: we may want an alternate mode here which is /// "if you are about to return NOT_FOUND I won't use /// the terms data from that"; eg FuzzyTermsEnum will /// (usually) just immediately call seek again if we /// return NOT_FOUND so it's a waste for us to fill in /// the term that was actually NOT_FOUND /// </remarks> public override SeekStatus SeekCeil(BytesRef target) { if (_indexEnum == null) { throw new InvalidOperationException("terms index was not loaded"); } var doSeek = true; // See if we can avoid seeking, because target term // is after current term but before next index term: if (_indexIsCurrent) { var cmp = BytesRef.UTF8SortedAsUnicodeComparer.Compare(_term, target); if (cmp == 0) { return(SeekStatus.FOUND); // Already at the requested term } if (cmp < 0) { // Target term is after current term if (!_didIndexNext) { _nextIndexTerm = _indexEnum.Next == -1 ? null : _indexEnum.Term; _didIndexNext = true; } if (_nextIndexTerm == null || BytesRef.UTF8SortedAsUnicodeComparer.Compare(target, _nextIndexTerm) < 0) { // Optimization: requested term is within the // same term block we are now in; skip seeking // (but do scanning): doSeek = false; } } } if (doSeek) { //System.out.println(" seek"); // Ask terms index to find biggest indexed term (= // first term in a block) that's <= our text: _input.Seek(_indexEnum.Seek(target).Value); var result = NextBlock(); // Block must exist since, at least, the indexed term // is in the block: Debug.Assert(result); _indexIsCurrent = true; _didIndexNext = false; _blocksSinceSeek = 0; if (_doOrd) { _state.Ord = _indexEnum.Ord - 1; } _term.CopyBytes(_indexEnum.Term); } else { if (_state.TermBlockOrd == _blockTermCount && !NextBlock()) { _indexIsCurrent = false; return(SeekStatus.END); } } _seekPending = false; var common = 0; // Scan within block. We could do this by calling // _next() and testing the resulting term, but this // is wasteful. Instead, we first confirm the // target matches the common prefix of this block, // and then we scan the term bytes directly from the // termSuffixesreader's byte[], saving a copy into // the BytesRef term per term. Only when we return // do we then copy the bytes into the term. while (true) { // First, see if target term matches common prefix // in this block: if (common < _termBlockPrefix) { var cmp = (_term.Bytes[common] & 0xFF) - (target.Bytes[target.Offset + common] & 0xFF); if (cmp < 0) { // TODO: maybe we should store common prefix // in block header? (instead of relying on // last term of previous block) // Target's prefix is after the common block // prefix, so term cannot be in this block // but it could be in next block. We // must scan to end-of-block to set common // prefix for next block: if (_state.TermBlockOrd < _blockTermCount) { while (_state.TermBlockOrd < _blockTermCount - 1) { _state.TermBlockOrd++; _state.Ord++; _termSuffixesReader.SkipBytes(_termSuffixesReader.ReadVInt()); } var suffix = _termSuffixesReader.ReadVInt(); _term.Length = _termBlockPrefix + suffix; if (_term.Bytes.Length < _term.Length) { _term.Grow(_term.Length); } _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix); } _state.Ord++; if (!NextBlock()) { _indexIsCurrent = false; return(SeekStatus.END); } common = 0; } else if (cmp > 0) { // Target's prefix is before the common prefix // of this block, so we position to start of // block and return NOT_FOUND: Debug.Assert(_state.TermBlockOrd == 0); var suffix = _termSuffixesReader.ReadVInt(); _term.Length = _termBlockPrefix + suffix; if (_term.Bytes.Length < _term.Length) { _term.Grow(_term.Length); } _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix); return(SeekStatus.NOT_FOUND); } else { common++; } continue; } // Test every term in this block while (true) { _state.TermBlockOrd++; _state.Ord++; var suffix = _termSuffixesReader.ReadVInt(); // We know the prefix matches, so just compare the new suffix: var termLen = _termBlockPrefix + suffix; var bytePos = _termSuffixesReader.Position; var next = false; var limit = target.Offset + (termLen < target.Length ? termLen : target.Length); var targetPos = target.Offset + _termBlockPrefix; while (targetPos < limit) { var cmp = (_termSuffixes[bytePos++] & 0xFF) - (target.Bytes[targetPos++] & 0xFF); if (cmp < 0) { // Current term is still before the target; // keep scanning next = true; break; } if (cmp <= 0) { continue; } // Done! Current term is after target. Stop // here, fill in real term, return NOT_FOUND. _term.Length = _termBlockPrefix + suffix; if (_term.Bytes.Length < _term.Length) { _term.Grow(_term.Length); } _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix); return(SeekStatus.NOT_FOUND); } if (!next && target.Length <= termLen) { _term.Length = _termBlockPrefix + suffix; if (_term.Bytes.Length < _term.Length) { _term.Grow(_term.Length); } _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix); return(target.Length == termLen ? SeekStatus.FOUND : SeekStatus.NOT_FOUND); } if (_state.TermBlockOrd == _blockTermCount) { // Must pre-fill term for next block's common prefix _term.Length = _termBlockPrefix + suffix; if (_term.Bytes.Length < _term.Length) { _term.Grow(_term.Length); } _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix); break; } _termSuffixesReader.SkipBytes(suffix); } // The purpose of the terms dict index is to seek // the enum to the closest index term before the // term we are looking for. So, we should never // cross another index term (besides the first // one) while we are scanning: Debug.Assert(_indexIsCurrent); if (!NextBlock()) { _indexIsCurrent = false; return(SeekStatus.END); } common = 0; } }
protected override void SeekDir(IndexInput input, long dirOffset) { input.Seek(input.Length() - sizeof(long)/8); long offset = input.ReadLong(); input.Seek(offset); }
public SegmentTermsEnum(FieldReader fieldReader, BlockTermsReader blockTermsReader) { _fieldReader = fieldReader; _blockTermsReader = blockTermsReader; _input = (IndexInput) _blockTermsReader._input.Clone(); _input.Seek(_fieldReader._termsStartPointer); _indexEnum = _blockTermsReader._indexReader.GetFieldEnum(_fieldReader._fieldInfo); _doOrd = _blockTermsReader._indexReader.SupportsOrd; _fieldTerm.Field = _fieldReader._fieldInfo.Name; _state = _blockTermsReader._postingsReader.NewTermState(); _state.TotalTermFreq = -1; _state.Ord = -1; _termSuffixes = new byte[128]; _docFreqBytes = new byte[64]; _longs = new long[_fieldReader._longsSize]; }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingTermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader; // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader compressingTermVectorsReader) { matchingVectorsReader = compressingTermVectorsReader; } } int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != compressionMode || matchingVectorsReader.ChunkSize != chunkSize || matchingVectorsReader.PackedInt32sVersion != PackedInt32s.VERSION_CURRENT) { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); } } else { CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index; IndexInput vectorsStreamOrig = matchingVectorsReader.VectorsStream; vectorsStreamOrig.Seek(0); ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone()); for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;) { // We make sure to move the checksum input in any case, otherwise the final // integrity check might need to read the whole file a second time long startPointer = index.GetStartPointer(i); if (startPointer > vectorsStream.GetFilePointer()) { vectorsStream.Seek(startPointer); } if ((pendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk { int docBase = vectorsStream.ReadVInt32(); int chunkDocs = vectorsStream.ReadVInt32(); if (Debugging.AssertsEnabled) { Debugging.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc); } if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs) { long chunkEnd = index.GetStartPointer(docBase + chunkDocs); long chunkLength = chunkEnd - vectorsStream.GetFilePointer(); indexWriter.WriteIndex(chunkDocs, this.vectorsStream.GetFilePointer()); this.vectorsStream.WriteVInt32(docCount); this.vectorsStream.WriteVInt32(chunkDocs); this.vectorsStream.CopyBytes(vectorsStream, chunkLength); docCount += chunkDocs; this.numDocs += chunkDocs; mergeState.CheckAbort.Work(300 * chunkDocs); i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc); } else { for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); } } } else { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); i = NextLiveDoc(i + 1, liveDocs, maxDoc); } } vectorsStream.Seek(vectorsStream.Length - CodecUtil.FooterLength()); CodecUtil.CheckFooter(vectorsStream); } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
// LUCENENET NOTE: Changed from public to internal because the class had to be made public, but is not for public use. internal SimpleTextDocValuesReader(SegmentReadState state, string ext) { data = state.Directory.OpenInput( IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, ext), state.Context); maxDoc = state.SegmentInfo.DocCount; while (true) { ReadLine(); if (scratch.Equals(SimpleTextDocValuesWriter.END)) { break; } Debug.Assert(StartsWith(SimpleTextDocValuesWriter.FIELD), scratch.Utf8ToString()); var fieldName = StripPrefix(SimpleTextDocValuesWriter.FIELD); var field = new OneField(); fields[fieldName] = field; ReadLine(); Debug.Assert(StartsWith(SimpleTextDocValuesWriter.TYPE), scratch.Utf8ToString()); var dvType = (DocValuesType) Enum.Parse(typeof(DocValuesType), StripPrefix(SimpleTextDocValuesWriter.TYPE)); if (dvType == DocValuesType.NUMERIC) { ReadLine(); Debug.Assert(StartsWith(SimpleTextDocValuesWriter.MINVALUE), "got " + scratch.Utf8ToString() + " field=" + fieldName + " ext=" + ext); field.MinValue = Convert.ToInt64(StripPrefix(SimpleTextDocValuesWriter.MINVALUE), CultureInfo.InvariantCulture); ReadLine(); Debug.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN)); field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN); field.DataStartFilePointer = data.GetFilePointer(); data.Seek(data.GetFilePointer() + (1 + field.Pattern.Length + 2) * maxDoc); } else if (dvType == DocValuesType.BINARY) { ReadLine(); Debug.Assert(StartsWith(SimpleTextDocValuesWriter.MAXLENGTH)); field.MaxLength = Convert.ToInt32(StripPrefix(SimpleTextDocValuesWriter.MAXLENGTH), CultureInfo.InvariantCulture); ReadLine(); Debug.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN)); field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN); field.DataStartFilePointer = data.GetFilePointer(); data.Seek(data.GetFilePointer() + (9 + field.Pattern.Length + field.MaxLength + 2) * maxDoc); } else if (dvType == DocValuesType.SORTED || dvType == DocValuesType.SORTED_SET) { ReadLine(); Debug.Assert(StartsWith(SimpleTextDocValuesWriter.NUMVALUES)); field.NumValues = Convert.ToInt64(StripPrefix(SimpleTextDocValuesWriter.NUMVALUES), CultureInfo.InvariantCulture); ReadLine(); Debug.Assert(StartsWith(SimpleTextDocValuesWriter.MAXLENGTH)); field.MaxLength = Convert.ToInt32(StripPrefix(SimpleTextDocValuesWriter.MAXLENGTH), CultureInfo.InvariantCulture); ReadLine(); Debug.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN)); field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN); ReadLine(); Debug.Assert(StartsWith(SimpleTextDocValuesWriter.ORDPATTERN)); field.OrdPattern = StripPrefix(SimpleTextDocValuesWriter.ORDPATTERN); field.DataStartFilePointer = data.GetFilePointer(); data.Seek(data.GetFilePointer() + (9 + field.Pattern.Length + field.MaxLength) * field.NumValues + (1 + field.OrdPattern.Length) * maxDoc); } else { throw new ArgumentOutOfRangeException(); } } // We should only be called from above if at least one // field has DVs: Debug.Assert(fields.Count > 0); }
public override void Seek(long pos) { cacheInput.Seek(pos); }
internal virtual TermsEnum GetTermsEnum(IndexInput input) { input.Seek(bytes.offset); return(new TermsEnumAnonymousClass(this, input)); }
// Not private to avoid synthetic access$NNN methods internal virtual void SeekTvx(int docNum) { tvx.Seek(docNum * 16L + HEADER_LENGTH_INDEX); }
private void SeekIndex(int docID) { indexStream.Seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L); }
public override long Get(int index) { long majorBitPos = (long)index * m_bitsPerValue; long elementPos = majorBitPos.TripleShift(3); try { @in.Seek(startPointer + elementPos); int bitPos = (int)(majorBitPos & 7); // round up bits to a multiple of 8 to find total bytes needed to read int roundedBits = ((bitPos + m_bitsPerValue + 7) & ~7); // the number of extra bits read at the end to shift out int shiftRightBits = roundedBits - bitPos - m_bitsPerValue; long rawValue; switch (roundedBits.TripleShift(3)) { case 1: rawValue = @in.ReadByte(); break; case 2: rawValue = @in.ReadInt16(); break; case 3: rawValue = ((long)@in.ReadInt16() << 8) | (@in.ReadByte() & 0xFFL); break; case 4: rawValue = @in.ReadInt32(); break; case 5: rawValue = ((long)@in.ReadInt32() << 8) | (@in.ReadByte() & 0xFFL); break; case 6: rawValue = ((long)@in.ReadInt32() << 16) | (@in.ReadInt16() & 0xFFFFL); break; case 7: rawValue = ((long)@in.ReadInt32() << 24) | ((@in.ReadInt16() & 0xFFFFL) << 8) | (@in.ReadByte() & 0xFFL); break; case 8: rawValue = @in.ReadInt64(); break; case 9: // We must be very careful not to shift out relevant bits. So we account for right shift // we would normally do on return here, and reset it. rawValue = (@in.ReadInt64() << (8 - shiftRightBits)) | (((uint)(@in.ReadByte() & 0xFFL) >> shiftRightBits)); shiftRightBits = 0; break; default: throw new InvalidOperationException("bitsPerValue too large: " + m_bitsPerValue); } return((rawValue.TripleShift(shiftRightBits)) & valueMask); } catch (IOException ioe) { throw new InvalidOperationException("failed", ioe); } }
public override int NextPosition() { if (lazyProxPointer != -1) { proxIn.Seek(lazyProxPointer); lazyProxPointer = -1; } if (payloadPending && payloadLength > 0) { // payload of last position was never retrieved -- skip it proxIn.Seek(proxIn.GetFilePointer() + payloadLength); payloadPending = false; } // scan over any docs that were iterated without their positions while (posPendingCount > freq) { int code = proxIn.ReadVInt32(); if (storePayloads) { if ((code & 1) != 0) { // new payload length payloadLength = proxIn.ReadVInt32(); Debug.Assert(payloadLength >= 0); } Debug.Assert(payloadLength != -1); } if (storeOffsets) { if ((proxIn.ReadVInt32() & 1) != 0) { // new offset length offsetLength = proxIn.ReadVInt32(); } } if (storePayloads) { proxIn.Seek(proxIn.GetFilePointer() + payloadLength); } posPendingCount--; position = 0; startOffset = 0; payloadPending = false; //System.out.println("StandardR.D&PE skipPos"); } // read next position if (payloadPending && payloadLength > 0) { // payload wasn't retrieved for last position proxIn.Seek(proxIn.GetFilePointer() + payloadLength); } int code_ = proxIn.ReadVInt32(); if (storePayloads) { if ((code_ & 1) != 0) { // new payload length payloadLength = proxIn.ReadVInt32(); Debug.Assert(payloadLength >= 0); } Debug.Assert(payloadLength != -1); payloadPending = true; code_ = (int)((uint)code_ >> 1); } position += code_; if (storeOffsets) { int offsetCode = proxIn.ReadVInt32(); if ((offsetCode & 1) != 0) { // new offset length offsetLength = proxIn.ReadVInt32(); } startOffset += (int)((uint)offsetCode >> 1); } posPendingCount--; Debug.Assert(posPendingCount >= 0, "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount); //System.out.println("StandardR.D&PE nextPos return pos=" + position); return(position); }
public override void VisitDocument(int docID, StoredFieldVisitor visitor) { fieldsStream.Seek(indexReader.GetStartPointer(docID)); int docBase = fieldsStream.ReadVInt32(); int chunkDocs = fieldsStream.ReadVInt32(); if (docID < docBase || docID >= docBase + chunkDocs || docBase + chunkDocs > numDocs) { throw new CorruptIndexException("Corrupted: docID=" + docID + ", docBase=" + docBase + ", chunkDocs=" + chunkDocs + ", numDocs=" + numDocs + " (resource=" + fieldsStream + ")"); } int numStoredFields, offset, length, totalLength; if (chunkDocs == 1) { numStoredFields = fieldsStream.ReadVInt32(); offset = 0; length = fieldsStream.ReadVInt32(); totalLength = length; } else { int bitsPerStoredFields = fieldsStream.ReadVInt32(); if (bitsPerStoredFields == 0) { numStoredFields = fieldsStream.ReadVInt32(); } else if (bitsPerStoredFields > 31) { throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")"); } else { long filePointer = fieldsStream.GetFilePointer(); PackedInt32s.Reader reader = PackedInt32s.GetDirectReaderNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields); numStoredFields = (int)(reader.Get(docID - docBase)); fieldsStream.Seek(filePointer + PackedInt32s.Format.PACKED.ByteCount(packedIntsVersion, chunkDocs, bitsPerStoredFields)); } int bitsPerLength = fieldsStream.ReadVInt32(); if (bitsPerLength == 0) { length = fieldsStream.ReadVInt32(); offset = (docID - docBase) * length; totalLength = chunkDocs * length; } else if (bitsPerStoredFields > 31) { throw new CorruptIndexException("bitsPerLength=" + bitsPerLength + " (resource=" + fieldsStream + ")"); } else { PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerLength, 1); int off = 0; for (int i = 0; i < docID - docBase; ++i) { off += (int)it.Next(); } offset = off; length = (int)it.Next(); off += length; for (int i = docID - docBase + 1; i < chunkDocs; ++i) { off += (int)it.Next(); } totalLength = off; } } if ((length == 0) != (numStoredFields == 0)) { throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields + " (resource=" + fieldsStream + ")"); } if (numStoredFields == 0) { // nothing to do return; } DataInput documentInput; if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS && totalLength >= 2 * chunkSize) { if (Debugging.AssertsEnabled) { Debugging.Assert(chunkSize > 0); Debugging.Assert(offset < chunkSize); } decompressor.Decompress(fieldsStream, chunkSize, offset, Math.Min(length, chunkSize - offset), bytes); documentInput = new DataInputAnonymousClass(this, length); } else { BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef(); decompressor.Decompress(fieldsStream, totalLength, offset, length, bytes); if (Debugging.AssertsEnabled) { Debugging.Assert(bytes.Length == length); } documentInput = new ByteArrayDataInput(bytes.Bytes, bytes.Offset, bytes.Length); } for (int fieldIDX = 0; fieldIDX < numStoredFields; fieldIDX++) { long infoAndBits = documentInput.ReadVInt64(); int fieldNumber = (int)infoAndBits.TripleShift(CompressingStoredFieldsWriter.TYPE_BITS); FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); int bits = (int)(infoAndBits & CompressingStoredFieldsWriter.TYPE_MASK); if (Debugging.AssertsEnabled) { Debugging.Assert(bits <= CompressingStoredFieldsWriter.NUMERIC_DOUBLE, "bits={0:x}", bits); } switch (visitor.NeedsField(fieldInfo)) { case StoredFieldVisitor.Status.YES: ReadField(documentInput, visitor, fieldInfo, bits); break; case StoredFieldVisitor.Status.NO: SkipField(documentInput, bits); break; case StoredFieldVisitor.Status.STOP: return; } } }
/// <summary> /// Returns (but does not validate) the checksum previously written by <see cref="CheckFooter(ChecksumIndexInput)"/>. </summary> /// <returns> actual checksum value </returns> /// <exception cref="IOException"> If the footer is invalid. </exception> public static long RetrieveChecksum(IndexInput @in) { @in.Seek(@in.Length - FooterLength()); ValidateFooter(@in); return(@in.ReadInt64()); }
// LUCENENET NOTE: Changed from public to internal because the class had to be made public, but is not for public use. internal SimpleTextDocValuesReader(SegmentReadState state, string ext) { data = state.Directory.OpenInput( IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, ext), state.Context); maxDoc = state.SegmentInfo.DocCount; while (true) { ReadLine(); if (scratch.Equals(SimpleTextDocValuesWriter.END)) { break; } // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.FIELD), "{0}", new BytesRefFormatter(scratch, BytesRefFormat.UTF8)); } var fieldName = StripPrefix(SimpleTextDocValuesWriter.FIELD); var field = new OneField(); fields[fieldName] = field; ReadLine(); // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.TYPE), "{0}", new BytesRefFormatter(scratch, BytesRefFormat.UTF8)); } var dvType = (DocValuesType)Enum.Parse(typeof(DocValuesType), StripPrefix(SimpleTextDocValuesWriter.TYPE)); // if (Debugging.AssertsEnabled) Debugging.Assert(dvType != null); // LUCENENET: Not possible for an enum to be null in .NET if (dvType == DocValuesType.NUMERIC) { ReadLine(); // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.MINVALUE), "got {0} field={1} ext={2}", new BytesRefFormatter(scratch, BytesRefFormat.UTF8), fieldName, ext); } field.MinValue = Convert.ToInt64(StripPrefix(SimpleTextDocValuesWriter.MINVALUE), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN)); } field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN); field.DataStartFilePointer = data.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream data.Seek(data.Position + (1 + field.Pattern.Length + 2) * maxDoc); // LUCENENET specific: Renamed from getFilePointer() to match FileStream } else if (dvType == DocValuesType.BINARY) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.MAXLENGTH)); } field.MaxLength = Convert.ToInt32(StripPrefix(SimpleTextDocValuesWriter.MAXLENGTH), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN)); } field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN); field.DataStartFilePointer = data.Position; data.Seek(data.Position + (9 + field.Pattern.Length + field.MaxLength + 2) * maxDoc); } else if (dvType == DocValuesType.SORTED || dvType == DocValuesType.SORTED_SET) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.NUMVALUES)); } field.NumValues = Convert.ToInt64(StripPrefix(SimpleTextDocValuesWriter.NUMVALUES), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.MAXLENGTH)); } field.MaxLength = Convert.ToInt32(StripPrefix(SimpleTextDocValuesWriter.MAXLENGTH), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN)); } field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.ORDPATTERN)); } field.OrdPattern = StripPrefix(SimpleTextDocValuesWriter.ORDPATTERN); field.DataStartFilePointer = data.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream data.Seek(data.Position + (9 + field.Pattern.Length + field.MaxLength) * field.NumValues + (1 + field.OrdPattern.Length) * maxDoc); // LUCENENET specific: Renamed from getFilePointer() to match FileStream } else { throw AssertionError.Create(); } } // We should only be called from above if at least one // field has DVs: if (Debugging.AssertsEnabled) { Debugging.Assert(fields.Count > 0); } }
public override Fields Get(int doc) { EnsureOpen(); // seek to the right place { long startPointer = indexReader.GetStartPointer(doc); vectorsStream.Seek(startPointer); } // decode // - docBase: first doc ID of the chunk // - chunkDocs: number of docs of the chunk int docBase = vectorsStream.ReadVInt32(); int chunkDocs = vectorsStream.ReadVInt32(); if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) { throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc + " (resource=" + vectorsStream + ")"); } int skip; // number of fields to skip int numFields; // number of fields of the document we're looking for int totalFields; // total number of fields of the chunk (sum for all docs) if (chunkDocs == 1) { skip = 0; numFields = totalFields = vectorsStream.ReadVInt32(); } else { reader.Reset(vectorsStream, chunkDocs); int sum = 0; for (int i = docBase; i < doc; ++i) { sum += (int)reader.Next(); } skip = sum; numFields = (int)reader.Next(); sum += numFields; for (int i = doc + 1; i < docBase + chunkDocs; ++i) { sum += (int)reader.Next(); } totalFields = sum; } if (numFields == 0) { // no vectors return(null); } // read field numbers that have term vectors int[] fieldNums; { int token = vectorsStream.ReadByte() & 0xFF; Debug.Assert(token != 0); // means no term vectors, cannot happen since we checked for numFields == 0 int bitsPerFieldNum = token & 0x1F; int totalDistinctFields = (int)((uint)token >> 5); if (totalDistinctFields == 0x07) { totalDistinctFields += vectorsStream.ReadVInt32(); } ++totalDistinctFields; PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1); fieldNums = new int[totalDistinctFields]; for (int i = 0; i < totalDistinctFields; ++i) { fieldNums[i] = (int)it.Next(); } } // read field numbers and flags int[] fieldNumOffs = new int[numFields]; PackedInt32s.Reader flags; { int bitsPerOff = PackedInt32s.BitsRequired(fieldNums.Length - 1); PackedInt32s.Reader allFieldNumOffs = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff); switch (vectorsStream.ReadVInt32()) { case 0: PackedInt32s.Reader fieldFlags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, fieldNums.Length, CompressingTermVectorsWriter.FLAGS_BITS); PackedInt32s.Mutable f = PackedInt32s.GetMutable(totalFields, CompressingTermVectorsWriter.FLAGS_BITS, PackedInt32s.COMPACT); for (int i = 0; i < totalFields; ++i) { int fieldNumOff = (int)allFieldNumOffs.Get(i); Debug.Assert(fieldNumOff >= 0 && fieldNumOff < fieldNums.Length); int fgs = (int)fieldFlags.Get(fieldNumOff); f.Set(i, fgs); } flags = f; break; case 1: flags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, CompressingTermVectorsWriter.FLAGS_BITS); break; default: throw new Exception(); } for (int i = 0; i < numFields; ++i) { fieldNumOffs[i] = (int)allFieldNumOffs.Get(skip + i); } } // number of terms per field for all fields PackedInt32s.Reader numTerms; int totalTerms; { int bitsRequired = vectorsStream.ReadVInt32(); numTerms = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsRequired); int sum = 0; for (int i = 0; i < totalFields; ++i) { sum += (int)numTerms.Get(i); } totalTerms = sum; } // term lengths int docOff = 0, docLen = 0, totalLen; int[] fieldLengths = new int[numFields]; int[][] prefixLengths = new int[numFields][]; int[][] suffixLengths = new int[numFields][]; { reader.Reset(vectorsStream, totalTerms); // skip int toSkip = 0; for (int i = 0; i < skip; ++i) { toSkip += (int)numTerms.Get(i); } reader.Skip(toSkip); // read prefix lengths for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); int[] fieldPrefixLengths = new int[termCount]; prefixLengths[i] = fieldPrefixLengths; for (int j = 0; j < termCount;) { Int64sRef next = reader.Next(termCount - j); for (int k = 0; k < next.Length; ++k) { fieldPrefixLengths[j++] = (int)next.Int64s[next.Offset + k]; } } } reader.Skip(totalTerms - reader.Ord); reader.Reset(vectorsStream, totalTerms); // skip toSkip = 0; for (int i = 0; i < skip; ++i) { for (int j = 0; j < numTerms.Get(i); ++j) { docOff += (int)reader.Next(); } } for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); int[] fieldSuffixLengths = new int[termCount]; suffixLengths[i] = fieldSuffixLengths; for (int j = 0; j < termCount;) { Int64sRef next = reader.Next(termCount - j); for (int k = 0; k < next.Length; ++k) { fieldSuffixLengths[j++] = (int)next.Int64s[next.Offset + k]; } } fieldLengths[i] = Sum(suffixLengths[i]); docLen += fieldLengths[i]; } totalLen = docOff + docLen; for (int i = skip + numFields; i < totalFields; ++i) { for (int j = 0; j < numTerms.Get(i); ++j) { totalLen += (int)reader.Next(); } } } // term freqs int[] termFreqs = new int[totalTerms]; { reader.Reset(vectorsStream, totalTerms); for (int i = 0; i < totalTerms;) { Int64sRef next = reader.Next(totalTerms - i); for (int k = 0; k < next.Length; ++k) { termFreqs[i++] = 1 + (int)next.Int64s[next.Offset + k]; } } } // total number of positions, offsets and payloads int totalPositions = 0, totalOffsets = 0, totalPayloads = 0; for (int i = 0, termIndex = 0; i < totalFields; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex++]; if ((f & CompressingTermVectorsWriter.POSITIONS) != 0) { totalPositions += freq; } if ((f & CompressingTermVectorsWriter.OFFSETS) != 0) { totalOffsets += freq; } if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { totalPayloads += freq; } } Debug.Assert(i != totalFields - 1 || termIndex == totalTerms, termIndex + " " + totalTerms); } int[][] positionIndex = PositionIndex(skip, numFields, numTerms, termFreqs); int[][] positions, startOffsets, lengths; if (totalPositions > 0) { positions = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.POSITIONS, totalPositions, positionIndex); } else { positions = new int[numFields][]; } if (totalOffsets > 0) { // average number of chars per term float[] charsPerTerm = new float[fieldNums.Length]; for (int i = 0; i < charsPerTerm.Length; ++i) { charsPerTerm[i] = J2N.BitConversion.Int32BitsToSingle(vectorsStream.ReadInt32()); } startOffsets = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex); lengths = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex); for (int i = 0; i < numFields; ++i) { int[] fStartOffsets = startOffsets[i]; int[] fPositions = positions[i]; // patch offsets from positions if (fStartOffsets != null && fPositions != null) { float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]]; for (int j = 0; j < startOffsets[i].Length; ++j) { fStartOffsets[j] += (int)(fieldCharsPerTerm * fPositions[j]); } } if (fStartOffsets != null) { int[] fPrefixLengths = prefixLengths[i]; int[] fSuffixLengths = suffixLengths[i]; int[] fLengths = lengths[i]; for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j) { // delta-decode start offsets and patch lengths using term lengths int termLength = fPrefixLengths[j] + fSuffixLengths[j]; lengths[i][positionIndex[i][j]] += termLength; for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) { fStartOffsets[k] += fStartOffsets[k - 1]; fLengths[k] += termLength; } } } } } else { startOffsets = lengths = new int[numFields][]; } if (totalPositions > 0) { // delta-decode positions for (int i = 0; i < numFields; ++i) { int[] fPositions = positions[i]; int[] fpositionIndex = positionIndex[i]; if (fPositions != null) { for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j) { // delta-decode start offsets for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) { fPositions[k] += fPositions[k - 1]; } } } } } // payload lengths int[][] payloadIndex = new int[numFields][]; int totalPayloadLength = 0; int payloadOff = 0; int payloadLen = 0; if (totalPayloads > 0) { reader.Reset(vectorsStream, totalPayloads); // skip int termIndex = 0; for (int i = 0; i < skip; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { int l = (int)reader.Next(); payloadOff += l; } } } termIndex += termCount; } totalPayloadLength = payloadOff; // read doc payload lengths for (int i = 0; i < numFields; ++i) { int f = (int)flags.Get(skip + i); int termCount = (int)numTerms.Get(skip + i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { int totalFreq = positionIndex[i][termCount]; payloadIndex[i] = new int[totalFreq + 1]; int posIdx = 0; payloadIndex[i][posIdx] = payloadLen; for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { int payloadLength = (int)reader.Next(); payloadLen += payloadLength; payloadIndex[i][posIdx + 1] = payloadLen; ++posIdx; } } Debug.Assert(posIdx == totalFreq); } termIndex += termCount; } totalPayloadLength += payloadLen; for (int i = skip + numFields; i < totalFields; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { totalPayloadLength += (int)reader.Next(); } } } termIndex += termCount; } Debug.Assert(termIndex == totalTerms, termIndex + " " + totalTerms); } // decompress data BytesRef suffixBytes = new BytesRef(); decompressor.Decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes); suffixBytes.Length = docLen; BytesRef payloadBytes = new BytesRef(suffixBytes.Bytes, suffixBytes.Offset + docLen, payloadLen); int[] FieldFlags = new int[numFields]; for (int i = 0; i < numFields; ++i) { FieldFlags[i] = (int)flags.Get(skip + i); } int[] fieldNumTerms = new int[numFields]; for (int i = 0; i < numFields; ++i) { fieldNumTerms[i] = (int)numTerms.Get(skip + i); } int[][] fieldTermFreqs = new int[numFields][]; { int termIdx = 0; for (int i = 0; i < skip; ++i) { termIdx += (int)numTerms.Get(i); } for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); fieldTermFreqs[i] = new int[termCount]; for (int j = 0; j < termCount; ++j) { fieldTermFreqs[i][j] = termFreqs[termIdx++]; } } } Debug.Assert(Sum(fieldLengths) == docLen, Sum(fieldLengths) + " != " + docLen); return(new TVFields(this, fieldNums, FieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths, prefixLengths, suffixLengths, fieldTermFreqs, positionIndex, positions, startOffsets, lengths, payloadBytes, payloadIndex, suffixBytes)); }
public override Fields Get(int doc) { // LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java var fields = new JCG.SortedDictionary <string, SimpleTVTerms>(StringComparer.Ordinal); _input.Seek(_offsets[doc]); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.NUMFIELDS)); } var numFields = ParseInt32At(SimpleTextTermVectorsWriter.NUMFIELDS.Length); if (numFields == 0) { return(null); // no vectors for this doc } for (var i = 0; i < numFields; i++) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELD)); } // skip fieldNumber: ParseInt32At(SimpleTextTermVectorsWriter.FIELD.Length); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDNAME)); } var fieldName = ReadString(SimpleTextTermVectorsWriter.FIELDNAME.Length, _scratch); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPOSITIONS)); } var positions = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPOSITIONS.Length, _scratch), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDOFFSETS)); } var offsets = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDOFFSETS.Length, _scratch), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPAYLOADS)); } var payloads = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPAYLOADS.Length, _scratch), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDTERMCOUNT)); } var termCount = ParseInt32At(SimpleTextTermVectorsWriter.FIELDTERMCOUNT.Length); var terms = new SimpleTVTerms(offsets, positions, payloads); fields.Add(fieldName, terms); for (var j = 0; j < termCount; j++) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMTEXT)); } var term = new BytesRef(); var termLength = _scratch.Length - SimpleTextTermVectorsWriter.TERMTEXT.Length; term.Grow(termLength); term.Length = termLength; Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.TERMTEXT.Length, term.Bytes, term.Offset, termLength); var postings = new SimpleTVPostings(); terms.terms.Add(term, postings); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMFREQ)); } postings.freq = ParseInt32At(SimpleTextTermVectorsWriter.TERMFREQ.Length); if (!positions && !offsets) { continue; } if (positions) { postings.positions = new int[postings.freq]; if (payloads) { postings.payloads = new BytesRef[postings.freq]; } } if (offsets) { postings.startOffsets = new int[postings.freq]; postings.endOffsets = new int[postings.freq]; } for (var k = 0; k < postings.freq; k++) { if (positions) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.POSITION)); } postings.positions[k] = ParseInt32At(SimpleTextTermVectorsWriter.POSITION.Length); if (payloads) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.PAYLOAD)); } if (_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length == 0) { postings.payloads[k] = null; } else { var payloadBytes = new byte[_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length]; Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.PAYLOAD.Length, payloadBytes, 0, payloadBytes.Length); postings.payloads[k] = new BytesRef(payloadBytes); } } } if (!offsets) { continue; } ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.STARTOFFSET)); } postings.startOffsets[k] = ParseInt32At(SimpleTextTermVectorsWriter.STARTOFFSET.Length); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.ENDOFFSET)); } postings.endOffsets[k] = ParseInt32At(SimpleTextTermVectorsWriter.ENDOFFSET.Length); } } } return(new SimpleTVFields(this, fields)); }
public override void Seek(long pos) { _indexInput?.Seek(pos); }
/// <summary> /// Seek {@code input} to the directory offset. </summary> protected internal virtual void SeekDir(IndexInput input, long dirOffset) { if (Version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { input.Seek(input.Length() - CodecUtil.FooterLength() - 8); dirOffset = input.ReadLong(); } else if (Version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY) { input.Seek(input.Length() - 8); dirOffset = input.ReadLong(); } input.Seek(dirOffset); }
private void SeekIndex(int docID) { IndexStream.Seek(Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX + docID * 8L); }
public virtual void TestRandomAccessClones() { SetUp_2(); CompoundFileDirectory cr = new CompoundFileDirectory(dir, "f.comp", NewIOContext(Random), false); // Open two files IndexInput e1 = cr.OpenInput("f11", NewIOContext(Random)); IndexInput e2 = cr.OpenInput("f3", NewIOContext(Random)); IndexInput a1 = (IndexInput)e1.Clone(); IndexInput a2 = (IndexInput)e2.Clone(); // Seek the first pair e1.Seek(100); a1.Seek(100); Assert.AreEqual(100, e1.GetFilePointer()); Assert.AreEqual(100, a1.GetFilePointer()); byte be1 = e1.ReadByte(); byte ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now seek the second pair e2.Seek(1027); a2.Seek(1027); Assert.AreEqual(1027, e2.GetFilePointer()); Assert.AreEqual(1027, a2.GetFilePointer()); byte be2 = e2.ReadByte(); byte ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Now make sure the first one didn't move Assert.AreEqual(101, e1.GetFilePointer()); Assert.AreEqual(101, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now more the first one again, past the buffer length e1.Seek(1910); a1.Seek(1910); Assert.AreEqual(1910, e1.GetFilePointer()); Assert.AreEqual(1910, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now make sure the second set didn't move Assert.AreEqual(1028, e2.GetFilePointer()); Assert.AreEqual(1028, a2.GetFilePointer()); be2 = e2.ReadByte(); ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Move the second set back, again cross the buffer size e2.Seek(17); a2.Seek(17); Assert.AreEqual(17, e2.GetFilePointer()); Assert.AreEqual(17, a2.GetFilePointer()); be2 = e2.ReadByte(); ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Finally, make sure the first set didn't move // Now make sure the first one didn't move Assert.AreEqual(1911, e1.GetFilePointer()); Assert.AreEqual(1911, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); e1.Dispose(); e2.Dispose(); a1.Dispose(); a2.Dispose(); cr.Dispose(); }
private void SeekDir(IndexInput input, long dirOffset) { if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { input.Seek(input.Length() - CodecUtil.FooterLength() - 8); dirOffset = input.ReadLong(); } else if (_version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) { input.Seek(input.Length() - 8); dirOffset = input.ReadLong(); } input.Seek(dirOffset); }
internal virtual TermsEnum GetTermsEnum(IndexInput input) { input.Seek(Bytes.Offset); return(new TermsEnumAnonymousInnerClassHelper(this, input)); }