internal BaseTermsEnum(TermsReader outerInstance) { this.outerInstance = outerInstance; this.state = outerInstance.outerInstance.postingsReader.NewTermState(); this.term = null; this.statsReader.Reset(outerInstance.statsBlock); this.metaLongsReader.Reset(outerInstance.metaLongsBlock); this.metaBytesReader.Reset(outerInstance.metaBytesBlock); this.longs = RectangularArrays.ReturnRectangularArray <long>(INTERVAL, outerInstance.longsSize); this.bytesStart = new int[INTERVAL]; this.bytesLength = new int[INTERVAL]; this.docFreq_Renamed = new int[INTERVAL]; this.totalTermFreq_Renamed = new long[INTERVAL]; this.statsBlockOrd = -1; this.metaBlockOrd = -1; if (!outerInstance.HasFreqs) { Arrays.Fill(totalTermFreq_Renamed, -1); } }
/// <summary> /// Called when we are done adding docs to this term </summary> public override void FinishTerm(BlockTermState _state) { StandardTermState state = (StandardTermState)_state; // if (DEBUG) System.out.println("SPW: finishTerm seg=" + segment + " freqStart=" + freqStart); Debug.Assert(state.DocFreq > 0); // TODO: wasteful we are counting this (counting # docs // for this term) in two places? Debug.Assert(state.DocFreq == Df); state.FreqStart = FreqStart; state.ProxStart = ProxStart; if (Df >= SkipMinimum) { state.SkipOffset = SkipListWriter.WriteSkip(FreqOut) - FreqStart; } else { state.SkipOffset = -1; } LastDocID = 0; Df = 0; }
public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, IBits liveDocs, DocsEnum reuse, DocsFlags flags) { SepTermState termState_ = (SepTermState)termState; SepDocsEnum docsEnum; if (reuse == null || !(reuse is SepDocsEnum)) { docsEnum = new SepDocsEnum(this); } else { docsEnum = (SepDocsEnum)reuse; if (docsEnum.startDocIn != docIn) { // If you are using ParellelReader, and pass in a // reused DocsAndPositionsEnum, it could have come // from another reader also using sep codec docsEnum = new SepDocsEnum(this); } } return(docsEnum.Init(fieldInfo, termState_, liveDocs)); }
public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) { BlockDocsEnum docsEnum; if (reuse is BlockDocsEnum) { docsEnum = (BlockDocsEnum)reuse; if (!docsEnum.CanReuse(DocIn, fieldInfo)) { docsEnum = new BlockDocsEnum(this, fieldInfo); } } else { docsEnum = new BlockDocsEnum(this, fieldInfo); } return docsEnum.Reset(liveDocs, (Lucene41PostingsWriter.IntBlockTermState)termState, flags); }
public override void DecodeTerm(long[] longs, DataInput @in, FieldInfo fieldInfo, BlockTermState _termState, bool absolute) { StandardTermState termState = (StandardTermState)_termState; // if (DEBUG) System.out.println("SPR: nextTerm seg=" + segment + " tbOrd=" + termState.termBlockOrd + " bytesReader.fp=" + termState.bytesReader.getPosition()); bool isFirstTerm = termState.TermBlockOrd == 0; if (absolute) { termState.FreqOffset = 0; termState.ProxOffset = 0; } termState.FreqOffset += @in.ReadVLong(); /* if (DEBUG) { System.out.println(" dF=" + termState.docFreq); System.out.println(" freqFP=" + termState.freqOffset); } */ Debug.Assert(termState.FreqOffset < FreqIn.Length()); if (termState.DocFreq >= SkipMinimum) { termState.SkipOffset = @in.ReadVLong(); // if (DEBUG) System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length()); Debug.Assert(termState.FreqOffset + termState.SkipOffset < FreqIn.Length()); } else { // undefined } if (fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { termState.ProxOffset += @in.ReadVLong(); // if (DEBUG) System.out.println(" proxFP=" + termState.proxOffset); } }
public override void EncodeTerm(long[] longs, DataOutput @out, FieldInfo fieldInfo, BlockTermState state, bool absolute) { Int32BlockTermState state2 = (Int32BlockTermState)state; if (absolute) { lastState = emptyState; } longs[0] = state2.docStartFP - lastState.docStartFP; if (fieldHasPositions) { longs[1] = state2.posStartFP - lastState.posStartFP; if (fieldHasPayloads || fieldHasOffsets) { longs[2] = state2.payStartFP - lastState.payStartFP; } } if (state2.singletonDocID != -1) { @out.WriteVInt32(state2.singletonDocID); } if (fieldHasPositions) { if (state2.lastPosBlockOffset != -1) { @out.WriteVInt64(state2.lastPosBlockOffset); } } if (state2.skipOffset != -1) { @out.WriteVInt64(state2.skipOffset); } lastState = state2; }
public SegmentTermsEnum(FieldReader fieldReader, BlockTermsReader blockTermsReader) { _fieldReader = fieldReader; _blockTermsReader = blockTermsReader; _input = (IndexInput) _blockTermsReader._input.Clone(); _input.Seek(_fieldReader._termsStartPointer); _indexEnum = _blockTermsReader._indexReader.GetFieldEnum(_fieldReader._fieldInfo); _doOrd = _blockTermsReader._indexReader.SupportsOrd; _fieldTerm.Field = _fieldReader._fieldInfo.Name; _state = _blockTermsReader._postingsReader.NewTermState(); _state.TotalTermFreq = -1; _state.Ord = -1; _termSuffixes = new byte[128]; _docFreqBytes = new byte[64]; _longs = new long[_fieldReader._longsSize]; }
public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState bTermState, Bits liveDocs, DocsEnum reuse, int flags) { var termState = (SepTermState)bTermState; SepDocsEnum docsEnum; if (!(reuse is SepDocsEnum)) { docsEnum = new SepDocsEnum(this); } else { docsEnum = (SepDocsEnum) reuse; if (docsEnum.START_DOC_IN != _docIn) { // If you are using ParellelReader, and pass in a // reused DocsAndPositionsEnum, it could have come // from another reader also using sep codec docsEnum = new SepDocsEnum(this); } } return docsEnum.Init(fieldInfo, termState, liveDocs); }
/// <summary> /// Called when we are done adding docs to this term </summary> public override void FinishTerm(BlockTermState _state) { IntBlockTermState state = (IntBlockTermState)_state; Debug.Assert(state.DocFreq > 0); // TODO: wasteful we are counting this (counting # docs // for this term) in two places? Debug.Assert(state.DocFreq == DocCount, state.DocFreq + " vs " + DocCount); // if (DEBUG) { // System.out.println("FPW.finishTerm docFreq=" + state.docFreq); // } // if (DEBUG) { // if (docBufferUpto > 0) { // System.out.println(" write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docStartFP=" + docStartFP); // } // } // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it. int singletonDocID; if (state.DocFreq == 1) { // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq singletonDocID = DocDeltaBuffer[0]; } else { singletonDocID = -1; // vInt encode the remaining doc deltas and freqs: for (int i = 0; i < DocBufferUpto; i++) { int docDelta = DocDeltaBuffer[i]; int freq = FreqBuffer[i]; if (!FieldHasFreqs) { DocOut.WriteVInt(docDelta); } else if (FreqBuffer[i] == 1) { DocOut.WriteVInt((docDelta << 1) | 1); } else { DocOut.WriteVInt(docDelta << 1); DocOut.WriteVInt(freq); } } } long lastPosBlockOffset; if (FieldHasPositions) { // if (DEBUG) { // if (posBufferUpto > 0) { // System.out.println(" write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posStartFP=" + posStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets); // } // } // totalTermFreq is just total number of positions(or payloads, or offsets) // associated with current term. Debug.Assert(state.TotalTermFreq != -1); if (state.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE) { // record file offset for last pos in last block lastPosBlockOffset = PosOut.FilePointer - PosStartFP; } else { lastPosBlockOffset = -1; } if (PosBufferUpto > 0) { // TODO: should we send offsets/payloads to // .pay...? seems wasteful (have to store extra // vLong for low (< BLOCK_SIZE) DF terms = vast vast // majority) // vInt encode the remaining positions/payloads/offsets: int lastPayloadLength = -1; // force first payload length to be written int lastOffsetLength = -1; // force first offset length to be written int payloadBytesReadUpto = 0; for (int i = 0; i < PosBufferUpto; i++) { int posDelta = PosDeltaBuffer[i]; if (FieldHasPayloads) { int payloadLength = PayloadLengthBuffer[i]; if (payloadLength != lastPayloadLength) { lastPayloadLength = payloadLength; PosOut.WriteVInt((posDelta << 1) | 1); PosOut.WriteVInt(payloadLength); } else { PosOut.WriteVInt(posDelta << 1); } // if (DEBUG) { // System.out.println(" i=" + i + " payloadLen=" + payloadLength); // } if (payloadLength != 0) { // if (DEBUG) { // System.out.println(" write payload @ pos.fp=" + posOut.getFilePointer()); // } PosOut.WriteBytes(PayloadBytes, payloadBytesReadUpto, payloadLength); payloadBytesReadUpto += payloadLength; } } else { PosOut.WriteVInt(posDelta); } if (FieldHasOffsets) { // if (DEBUG) { // System.out.println(" write offset @ pos.fp=" + posOut.getFilePointer()); // } int delta = OffsetStartDeltaBuffer[i]; int length = OffsetLengthBuffer[i]; if (length == lastOffsetLength) { PosOut.WriteVInt(delta << 1); } else { PosOut.WriteVInt(delta << 1 | 1); PosOut.WriteVInt(length); lastOffsetLength = length; } } } if (FieldHasPayloads) { Debug.Assert(payloadBytesReadUpto == PayloadByteUpto); PayloadByteUpto = 0; } } // if (DEBUG) { // System.out.println(" totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset); // } } else { lastPosBlockOffset = -1; } long skipOffset; if (DocCount > Lucene41PostingsFormat.BLOCK_SIZE) { skipOffset = SkipWriter.WriteSkip(DocOut) - DocStartFP; // if (DEBUG) { // System.out.println("skip packet " + (docOut.getFilePointer() - (docStartFP + skipOffset)) + " bytes"); // } } else { skipOffset = -1; // if (DEBUG) { // System.out.println(" no skip: docCount=" + docCount); // } } // if (DEBUG) { // System.out.println(" payStartFP=" + payStartFP); // } state.DocStartFP = DocStartFP; state.PosStartFP = PosStartFP; state.PayStartFP = PayStartFP; state.SingletonDocID = singletonDocID; state.SkipOffset = skipOffset; state.LastPosBlockOffset = lastPosBlockOffset; DocBufferUpto = 0; PosBufferUpto = 0; LastDocID = 0; DocCount = 0; }
private void FlushBlock() { // First pass: compute common prefix for all terms // in the block, against term before first term in // this block: int commonPrefix = SharedPrefix(_lastPrevTerm, _pendingTerms[0].Term); for (int termCount = 1; termCount < _pendingCount; termCount++) { commonPrefix = Math.Min(commonPrefix, SharedPrefix(_lastPrevTerm, _pendingTerms[termCount].Term)); } _btw._output.WriteVInt(_pendingCount); _btw._output.WriteVInt(commonPrefix); // 2nd pass: write suffixes, as separate byte[] blob for (var termCount = 0; termCount < _pendingCount; termCount++) { var suffix = _pendingTerms[termCount].Term.Length - commonPrefix; // TODO: cutover to better intblock codec, instead // of interleaving here: _bytesWriter.WriteVInt(suffix); _bytesWriter.WriteBytes(_pendingTerms[termCount].Term.Bytes, commonPrefix, suffix); } _btw._output.WriteVInt((int)_bytesWriter.FilePointer); _bytesWriter.WriteTo(_btw._output); _bytesWriter.Reset(); // 3rd pass: write the freqs as byte[] blob // TODO: cutover to better intblock codec. simple64? // write prefix, suffix first: for (int termCount = 0; termCount < _pendingCount; termCount++) { BlockTermState state = _pendingTerms[termCount].State; Debug.Assert(state != null); _bytesWriter.WriteVInt(state.DocFreq); if (_fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY) { _bytesWriter.WriteVLong(state.TotalTermFreq - state.DocFreq); } } _btw._output.WriteVInt((int)_bytesWriter.FilePointer); _bytesWriter.WriteTo(_btw._output); _bytesWriter.Reset(); // 4th pass: write the metadata var longs = new long[_longsSize]; bool absolute = true; for (int termCount = 0; termCount < _pendingCount; termCount++) { BlockTermState state = _pendingTerms[termCount].State; _postingsWriter.EncodeTerm(longs, _bufferWriter, _fieldInfo, state, absolute); for (int i = 0; i < _longsSize; i++) { _bytesWriter.WriteVLong(longs[i]); } _bufferWriter.WriteTo(_bytesWriter); _bufferWriter.Reset(); absolute = false; } _btw._output.WriteVInt((int)_bytesWriter.FilePointer); _bytesWriter.WriteTo(_btw._output); _bytesWriter.Reset(); _lastPrevTerm.CopyBytes(_pendingTerms[_pendingCount - 1].Term); _pendingCount = 0; }
public override void EncodeTerm(long[] longs, DataOutput output, FieldInfo fi, BlockTermState bstate, bool absolute) { var state = (SepTermState)bstate; if (absolute) { LAST_SKIP_FP = 0; LAST_PAYLOAD_FP = 0; _lastState = state; } _lastState.DocIndex.CopyFrom(state.DocIndex, false); _lastState.DocIndex.Write(output, absolute); if (INDEX_OPTIONS != FieldInfo.IndexOptions.DOCS_ONLY) { _lastState.FreqIndex.CopyFrom(state.FreqIndex, false); _lastState.FreqIndex.Write(output, absolute); if (INDEX_OPTIONS == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { _lastState.PosIndex.CopyFrom(state.PosIndex, false); _lastState.PosIndex.Write(output, absolute); if (STORE_PAYLOADS) { if (absolute) { output.WriteVLong(state.PayloadFp); } else { output.WriteVLong(state.PayloadFp - LAST_PAYLOAD_FP); } LAST_PAYLOAD_FP = state.PayloadFp; } } } if (state.SkipFp == -1) { return; } if (absolute) { output.WriteVLong(state.SkipFp); } else { output.WriteVLong(state.SkipFp - LAST_SKIP_FP); } LAST_SKIP_FP = state.SkipFp; }
public override void EncodeTerm(long[] empty, DataOutput output, FieldInfo fieldInfo, BlockTermState state, bool abs) { var _state = (PulsingTermState) state; Debug.Assert(empty.Length == 0); _absolute = _absolute || abs; if (_state.BYTES == null) { _wrappedPostingsWriter.EncodeTerm(_longs, _buffer, fieldInfo, _state.WRAPPED_STATE, _absolute); for (var i = 0; i < _longsSize; i++) { output.WriteVLong(_longs[i]); } _buffer.WriteTo(output); _buffer.Reset(); _absolute = false; } else { output.WriteVInt(_state.BYTES.Length); output.WriteBytes(_state.BYTES, 0, _state.BYTES.Length); _absolute = _absolute || abs; } }
/// <summary> /// Called when we are done adding docs to this term /// </summary> /// <param name="_state"></param> public override void FinishTerm(BlockTermState _state) { var state = (PulsingTermState) _state; Debug.Assert(_pendingCount > 0 || _pendingCount == -1); if (_pendingCount == -1) { state.WRAPPED_STATE.DocFreq = state.DocFreq; state.WRAPPED_STATE.TotalTermFreq = state.TotalTermFreq; state.BYTES = null; _wrappedPostingsWriter.FinishTerm(state.WRAPPED_STATE); } else { // There were few enough total occurrences for this // term, so we fully inline our postings data into // terms dict, now: // TODO: it'd be better to share this encoding logic // in some inner codec that knows how to write a // single doc / single position, etc. This way if a // given codec wants to store other interesting // stuff, it could use this pulsing codec to do so if (_indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { var lastDocID = 0; var pendingIDX = 0; var lastPayloadLength = -1; var lastOffsetLength = -1; while (pendingIDX < _pendingCount) { var doc = _pending[pendingIDX]; var delta = doc.docID - lastDocID; lastDocID = doc.docID; // if (DEBUG) System.out.println(" write doc=" + doc.docID + " freq=" + doc.termFreq); if (doc.termFreq == 1) { _buffer.WriteVInt((delta << 1) | 1); } else { _buffer.WriteVInt(delta << 1); _buffer.WriteVInt(doc.termFreq); } var lastPos = 0; var lastOffset = 0; for (var posIDX = 0; posIDX < doc.termFreq; posIDX++) { var pos = _pending[pendingIDX++]; Debug.Assert(pos.docID == doc.docID); var posDelta = pos.pos - lastPos; lastPos = pos.pos; var payloadLength = pos.payload == null ? 0 : pos.payload.Length; if (_storePayloads) { if (payloadLength != lastPayloadLength) { _buffer.WriteVInt((posDelta << 1) | 1); _buffer.WriteVInt(payloadLength); lastPayloadLength = payloadLength; } else { _buffer.WriteVInt(posDelta << 1); } } else { _buffer.WriteVInt(posDelta); } if (_indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { //System.out.println("write=" + pos.startOffset + "," + pos.endOffset); var offsetDelta = pos.startOffset - lastOffset; var offsetLength = pos.endOffset - pos.startOffset; if (offsetLength != lastOffsetLength) { _buffer.WriteVInt(offsetDelta << 1 | 1); _buffer.WriteVInt(offsetLength); } else { _buffer.WriteVInt(offsetDelta << 1); } lastOffset = pos.startOffset; lastOffsetLength = offsetLength; } if (payloadLength > 0) { Debug.Assert(_storePayloads); _buffer.WriteBytes(pos.payload.Bytes, 0, pos.payload.Length); } } } } else switch (_indexOptions) { case FieldInfo.IndexOptions.DOCS_AND_FREQS: { var lastDocId = 0; for (var posIdx = 0; posIdx < _pendingCount; posIdx++) { var doc = _pending[posIdx]; var delta = doc.docID - lastDocId; Debug.Assert(doc.termFreq != 0); if (doc.termFreq == 1) { _buffer.WriteVInt((delta << 1) | 1); } else { _buffer.WriteVInt(delta << 1); _buffer.WriteVInt(doc.termFreq); } lastDocId = doc.docID; } } break; case FieldInfo.IndexOptions.DOCS_ONLY: { var lastDocId = 0; for (var posIdx = 0; posIdx < _pendingCount; posIdx++) { var doc = _pending[posIdx]; _buffer.WriteVInt(doc.docID - lastDocId); lastDocId = doc.docID; } } break; } state.BYTES = new byte[(int) _buffer.FilePointer]; _buffer.WriteTo(state.BYTES, 0); _buffer.Reset(); } _pendingCount = 0; }
/// <summary>Called when we are done adding docs to this term </summary> public override void FinishTerm(BlockTermState bstate) { var state = (SepTermState)bstate; // TODO: -- wasteful we are counting this in two places? Debug.Assert(state.DocFreq > 0); Debug.Assert(state.DocFreq == DF); state.DocIndex = DOC_OUT.Index(); state.DocIndex.CopyFrom(DOC_INDEX, false); if (INDEX_OPTIONS != FieldInfo.IndexOptions.DOCS_ONLY) { state.FreqIndex = FREQ_OUT.Index(); state.FreqIndex.CopyFrom(FREQ_INDEX, false); if (INDEX_OPTIONS == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { state.PosIndex = POS_OUT.Index(); state.PosIndex.CopyFrom(POS_INDEX, false); } else { state.PosIndex = null; } } else { state.FreqIndex = null; state.PosIndex = null; } if (DF >= SKIP_MINIMUM) { state.SkipFp = SKIP_OUT.FilePointer; SKIP_LIST_WRITER.WriteSkip(SKIP_OUT); } else { state.SkipFp = -1; } state.PayloadFp = PAYLOAD_START; LAST_DOC_ID = 0; DF = 0; }
public override void EncodeTerm(long[] longs, DataOutput output, FieldInfo fi, BlockTermState bstate, bool absolute) { var state = (SepTermState) bstate; if (absolute) { LAST_SKIP_FP = 0; LAST_PAYLOAD_FP = 0; _lastState = state; } _lastState.DocIndex.CopyFrom(state.DocIndex, false); _lastState.DocIndex.Write(output, absolute); if (INDEX_OPTIONS != FieldInfo.IndexOptions.DOCS_ONLY) { _lastState.FreqIndex.CopyFrom(state.FreqIndex, false); _lastState.FreqIndex.Write(output, absolute); if (INDEX_OPTIONS == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { _lastState.PosIndex.CopyFrom(state.PosIndex, false); _lastState.PosIndex.Write(output, absolute); if (STORE_PAYLOADS) { if (absolute) { output.WriteVLong(state.PayloadFp); } else { output.WriteVLong(state.PayloadFp - LAST_PAYLOAD_FP); } LAST_PAYLOAD_FP = state.PayloadFp; } } } if (state.SkipFp == -1) return; if (absolute) { output.WriteVLong(state.SkipFp); } else { output.WriteVLong(state.SkipFp - LAST_SKIP_FP); } LAST_SKIP_FP = state.SkipFp; }
// TODO: specialize to liveDocs vs not public override DocsAndPositionsEnum DocsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { bool indexHasOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; bool indexHasPayloads = fieldInfo.HasPayloads(); if ((!indexHasOffsets || (flags & DocsAndPositionsEnum.FLAG_OFFSETS) == 0) && (!indexHasPayloads || (flags & DocsAndPositionsEnum.FLAG_PAYLOADS) == 0)) { BlockDocsAndPositionsEnum docsAndPositionsEnum; if (reuse is BlockDocsAndPositionsEnum) { docsAndPositionsEnum = (BlockDocsAndPositionsEnum)reuse; if (!docsAndPositionsEnum.CanReuse(DocIn, fieldInfo)) { docsAndPositionsEnum = new BlockDocsAndPositionsEnum(this, fieldInfo); } } else { docsAndPositionsEnum = new BlockDocsAndPositionsEnum(this, fieldInfo); } return docsAndPositionsEnum.Reset(liveDocs, (Lucene41PostingsWriter.IntBlockTermState)termState); } else { EverythingEnum everythingEnum; if (reuse is EverythingEnum) { everythingEnum = (EverythingEnum)reuse; if (!everythingEnum.CanReuse(DocIn, fieldInfo)) { everythingEnum = new EverythingEnum(this, fieldInfo); } } else { everythingEnum = new EverythingEnum(this, fieldInfo); } return everythingEnum.Reset(liveDocs, (Lucene41PostingsWriter.IntBlockTermState)termState, flags); } }
public override void EncodeTerm(long[] longs, DataOutput output, FieldInfo fi, BlockTermState state, bool absolute) { SepTermState state_ = (SepTermState)state; if (absolute) { lastSkipFP = 0; lastPayloadFP = 0; lastState = state_; } lastState.DocIndex.CopyFrom(state_.DocIndex, false); lastState.DocIndex.Write(output, absolute); if (indexOptions != IndexOptions.DOCS_ONLY) { lastState.FreqIndex.CopyFrom(state_.FreqIndex, false); lastState.FreqIndex.Write(output, absolute); if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { lastState.PosIndex.CopyFrom(state_.PosIndex, false); lastState.PosIndex.Write(output, absolute); if (storePayloads) { if (absolute) { output.WriteVInt64(state_.PayloadFP); } else { output.WriteVInt64(state_.PayloadFP - lastPayloadFP); } lastPayloadFP = state_.PayloadFP; } } } if (state_.SkipFP != -1) { if (absolute) { output.WriteVInt64(state_.SkipFP); } else { output.WriteVInt64(state_.SkipFP - lastSkipFP); } lastSkipFP = state_.SkipFP; } }
/// <summary> /// Called when we are done adding docs to this term /// </summary> /// <param name="_state"></param> public override void FinishTerm(BlockTermState _state) { PulsingTermState state = (PulsingTermState) _state; Debug.Debug.Assert((pendingCount > 0 || pendingCount == -1); if (pendingCount == -1) { state.wrappedState.DocFreq = state.DocFreq; state.wrappedState.TotalTermFreq = state.TotalTermFreq; state.bytes = null; _wrappedPostingsWriter.FinishTerm(state.wrappedState); } else { // There were few enough total occurrences for this // term, so we fully inline our postings data into // terms dict, now: // TODO: it'd be better to share this encoding logic // in some inner codec that knows how to write a // single doc / single position, etc. This way if a // given codec wants to store other interesting // stuff, it could use this pulsing codec to do so if (indexOptions.Value.CompareTo(FieldInfo.IndexOptions_e.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { int lastDocID = 0; int pendingIDX = 0; int lastPayloadLength = -1; int lastOffsetLength = -1; while (pendingIDX < pendingCount) { Position doc = pending[pendingIDX]; int delta = doc.docID - lastDocID; lastDocID = doc.docID; // if (DEBUG) System.out.println(" write doc=" + doc.docID + " freq=" + doc.termFreq); if (doc.termFreq == 1) { buffer.WriteVInt((delta << 1) | 1); } else { buffer.WriteVInt(delta << 1); buffer.WriteVInt(doc.termFreq); } int lastPos = 0; int lastOffset = 0; for (int posIDX = 0; posIDX < doc.termFreq; posIDX++) { Position pos = pending[pendingIDX++]; Debug.Debug.Assert((pos.docID == doc.docID); int posDelta = pos.pos - lastPos; lastPos = pos.pos; int payloadLength = pos.payload == null ? 0 : pos.payload.Length; if (storePayloads) { if (payloadLength != lastPayloadLength) { buffer.WriteVInt((posDelta << 1) | 1); buffer.WriteVInt(payloadLength); lastPayloadLength = payloadLength; } else { buffer.WriteVInt(posDelta << 1); } } else { buffer.WriteVInt(posDelta); } if (indexOptions.Value.CompareTo(FieldInfo.IndexOptions_e.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) { //System.out.println("write=" + pos.startOffset + "," + pos.endOffset); int offsetDelta = pos.startOffset - lastOffset; int offsetLength = pos.endOffset - pos.startOffset; if (offsetLength != lastOffsetLength) { buffer.WriteVInt(offsetDelta << 1 | 1); buffer.WriteVInt(offsetLength); } else { buffer.WriteVInt(offsetDelta << 1); } lastOffset = pos.startOffset; lastOffsetLength = offsetLength; } if (payloadLength > 0) { Debug.Debug.Assert((storePayloads); buffer.WriteBytes(pos.payload.Bytes, 0, pos.payload.Length); } } } } else if (indexOptions == FieldInfo.IndexOptions_e.DOCS_AND_FREQS) { int lastDocID = 0; for (int posIDX = 0; posIDX < pendingCount; posIDX++) { Position doc = pending[posIDX]; int delta = doc.docID - lastDocID; Debug.Debug.Assert((doc.termFreq != 0); if (doc.termFreq == 1) { buffer.WriteVInt((delta << 1) | 1); } else { buffer.WriteVInt(delta << 1); buffer.WriteVInt(doc.termFreq); } lastDocID = doc.docID; } } else if (indexOptions == FieldInfo.IndexOptions_e.DOCS_ONLY) { int lastDocID = 0; for (int posIDX = 0; posIDX < pendingCount; posIDX++) { Position doc = pending[posIDX]; buffer.WriteVInt(doc.docID - lastDocID); lastDocID = doc.docID; } } state.bytes = new byte[(int) buffer.FilePointer]; buffer.WriteTo((sbyte[])(Array)state.bytes, 0); buffer.Reset(); } pendingCount = 0; }
/// <summary> /// Called when we are done adding docs to this term /// </summary> /// <param name="state"></param> public override void FinishTerm(BlockTermState state) { var state2 = (PulsingTermState)state; Debug.Assert(_pendingCount > 0 || _pendingCount == -1); if (_pendingCount == -1) { state2.wrappedState.DocFreq = state2.DocFreq; state2.wrappedState.TotalTermFreq = state2.TotalTermFreq; state2.bytes = null; _wrappedPostingsWriter.FinishTerm(state2.wrappedState); } else { // There were few enough total occurrences for this // term, so we fully inline our postings data into // terms dict, now: // TODO: it'd be better to share this encoding logic // in some inner codec that knows how to write a // single doc / single position, etc. This way if a // given codec wants to store other interesting // stuff, it could use this pulsing codec to do so if (_indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { var lastDocID = 0; var pendingIDX = 0; var lastPayloadLength = -1; var lastOffsetLength = -1; while (pendingIDX < _pendingCount) { var doc = _pending[pendingIDX]; var delta = doc.docID - lastDocID; lastDocID = doc.docID; // if (DEBUG) System.out.println(" write doc=" + doc.docID + " freq=" + doc.termFreq); if (doc.termFreq == 1) { _buffer.WriteVInt32((delta << 1) | 1); } else { _buffer.WriteVInt32(delta << 1); _buffer.WriteVInt32(doc.termFreq); } var lastPos = 0; var lastOffset = 0; for (var posIDX = 0; posIDX < doc.termFreq; posIDX++) { var pos = _pending[pendingIDX++]; Debug.Assert(pos.docID == doc.docID); var posDelta = pos.pos - lastPos; lastPos = pos.pos; var payloadLength = pos.payload == null ? 0 : pos.payload.Length; if (_storePayloads) { if (payloadLength != lastPayloadLength) { _buffer.WriteVInt32((posDelta << 1) | 1); _buffer.WriteVInt32(payloadLength); lastPayloadLength = payloadLength; } else { _buffer.WriteVInt32(posDelta << 1); } } else { _buffer.WriteVInt32(posDelta); } if (_indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) { //System.out.println("write=" + pos.startOffset + "," + pos.endOffset); var offsetDelta = pos.startOffset - lastOffset; var offsetLength = pos.endOffset - pos.startOffset; if (offsetLength != lastOffsetLength) { _buffer.WriteVInt32(offsetDelta << 1 | 1); _buffer.WriteVInt32(offsetLength); } else { _buffer.WriteVInt32(offsetDelta << 1); } lastOffset = pos.startOffset; lastOffsetLength = offsetLength; } if (payloadLength > 0) { Debug.Assert(_storePayloads); _buffer.WriteBytes(pos.payload.Bytes, 0, pos.payload.Length); } } } } else if (_indexOptions == IndexOptions.DOCS_AND_FREQS) { int lastDocId = 0; for (int posIdx = 0; posIdx < _pendingCount; posIdx++) { Position doc = _pending[posIdx]; int delta = doc.docID - lastDocId; Debug.Assert(doc.termFreq != 0); if (doc.termFreq == 1) { _buffer.WriteVInt32((delta << 1) | 1); } else { _buffer.WriteVInt32(delta << 1); _buffer.WriteVInt32(doc.termFreq); } lastDocId = doc.docID; } } else if (_indexOptions == IndexOptions.DOCS_ONLY) { int lastDocId = 0; for (int posIdx = 0; posIdx < _pendingCount; posIdx++) { Position doc = _pending[posIdx]; _buffer.WriteVInt32(doc.docID - lastDocId); lastDocId = doc.docID; } } state2.bytes = new byte[(int)_buffer.GetFilePointer()]; _buffer.WriteTo(state2.bytes, 0); _buffer.Reset(); } _pendingCount = 0; }
public override void EncodeTerm(long[] empty, DataOutput output, FieldInfo fieldInfo, BlockTermState _state, bool absolute) { PulsingTermState state = (PulsingTermState) _state; Debug.Debug.Assert((empty.Length == 0); this.absolute = this.absolute || absolute; if (state.bytes == null) { _wrappedPostingsWriter.EncodeTerm(longs, buffer, fieldInfo, state.wrappedState, this.absolute); for (int i = 0; i < longsSize; i++) { output.WriteVLong(longs[i]); } buffer.WriteTo(output); buffer.Reset(); this.absolute = false; } else { output.WriteVInt(state.bytes.Length); output.WriteBytes(state.bytes, 0, state.bytes.Length); this.absolute = this.absolute || absolute; } }
public override void DecodeTerm(long[] empty, DataInput input, FieldInfo fieldInfo, BlockTermState _termState, bool absolute) { var termState = (PulsingTermState)_termState; Debug.Assert(empty.Length == 0); termState.Absolute = termState.Absolute || absolute; // if we have positions, its total TF, otherwise its computed based on docFreq. // TODO Double check this is right.. long count = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS.CompareTo(fieldInfo.FieldIndexOptions) <= 0 ? termState.TotalTermFreq : termState.DocFreq; if (count <= _maxPositions) { // Inlined into terms dict -- just read the byte[] blob in, // but don't decode it now (we only decode when a DocsEnum // or D&PEnum is pulled): termState.PostingsSize = input.ReadVInt(); if (termState.Postings == null || termState.Postings.Length < termState.PostingsSize) { termState.Postings = new byte[ArrayUtil.Oversize(termState.PostingsSize, 1)]; } // TODO: sort of silly to copy from one big byte[] // (the blob holding all inlined terms' blobs for // current term block) into another byte[] (just the // blob for this term)... input.ReadBytes(termState.Postings, 0, termState.PostingsSize); //System.out.println(" inlined bytes=" + termState.postingsSize); termState.Absolute = termState.Absolute || absolute; } else { var longsSize = _fields == null ? 0 : _fields[fieldInfo.Number]; if (termState.Longs == null) { termState.Longs = new long[longsSize]; } for (var i = 0; i < longsSize; i++) { termState.Longs[i] = input.ReadVLong(); } termState.PostingsSize = -1; termState.WrappedTermState.DocFreq = termState.DocFreq; termState.WrappedTermState.TotalTermFreq = termState.TotalTermFreq; _wrappedPostingsReader.DecodeTerm(termState.Longs, input, fieldInfo, termState.WrappedTermState, termState.Absolute); termState.Absolute = false; } }
public override void DecodeTerm(long[] empty, DataInput input, FieldInfo fieldInfo, BlockTermState _termState, bool absolute) { PulsingTermState termState = (PulsingTermState) _termState; Debug.Debug.Assert((empty.Length == 0); termState.Absolute = termState.Absolute || absolute; // if we have positions, its total TF, otherwise its computed based on docFreq. // TODO Double check this is right.. long count = FieldInfo.IndexOptions_e.DOCS_AND_FREQS_AND_POSITIONS.CompareTo(fieldInfo.IndexOptions) <= 0 ? termState.TotalTermFreq : termState.DocFreq; //System.out.println(" count=" + count + " threshold=" + maxPositions); if (count <= maxPositions) { // Inlined into terms dict -- just read the byte[] blob in, // but don't decode it now (we only decode when a DocsEnum // or D&PEnum is pulled): termState.PostingsSize = input.ReadVInt(); if (termState.Postings == null || termState.Postings.Length < termState.PostingsSize) { termState.Postings = new byte[ArrayUtil.Oversize(termState.PostingsSize, 1)]; } // TODO: sort of silly to copy from one big byte[] // (the blob holding all inlined terms' blobs for // current term block) into another byte[] (just the // blob for this term)... input.ReadBytes(termState.Postings, 0, termState.PostingsSize); //System.out.println(" inlined bytes=" + termState.postingsSize); termState.Absolute = termState.Absolute || absolute; } else { int longsSize = fields == null ? 0 : fields[fieldInfo.Number]; if (termState.Longs == null) { termState.Longs = new long[longsSize]; } for (int i = 0; i < longsSize; i++) { termState.Longs[i] = input.ReadVLong(); } termState.PostingsSize = -1; termState.WrappedTermState.DocFreq = termState.DocFreq; termState.WrappedTermState.TotalTermFreq = termState.TotalTermFreq; _wrappedPostingsReader.DecodeTerm(termState.Longs, input, fieldInfo, termState.WrappedTermState, termState.Absolute); termState.Absolute = false; } }
public override void EncodeTerm(long[] longs, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute) { IntBlockTermState state = (IntBlockTermState)_state; if (absolute) { LastState = EmptyState; } longs[0] = state.DocStartFP - LastState.DocStartFP; if (FieldHasPositions) { longs[1] = state.PosStartFP - LastState.PosStartFP; if (FieldHasPayloads || FieldHasOffsets) { longs[2] = state.PayStartFP - LastState.PayStartFP; } } if (state.SingletonDocID != -1) { @out.WriteVInt(state.SingletonDocID); } if (FieldHasPositions) { if (state.LastPosBlockOffset != -1) { @out.WriteVLong(state.LastPosBlockOffset); } } if (state.SkipOffset != -1) { @out.WriteVLong(state.SkipOffset); } LastState = state; }
public override DocsAndPositionsEnum DocsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { PulsingTermState termState = (PulsingTermState) _termState; if (termState.PostingsSize != -1) { PulsingDocsAndPositionsEnum postings; if (reuse is PulsingDocsAndPositionsEnum) { postings = (PulsingDocsAndPositionsEnum) reuse; if (!postings.CanReuse(field)) { postings = new PulsingDocsAndPositionsEnum(field); } } else { // the 'reuse' is actually the wrapped enum PulsingDocsAndPositionsEnum previous = (PulsingDocsAndPositionsEnum) GetOther(reuse); if (previous != null && previous.CanReuse(field)) { postings = previous; } else { postings = new PulsingDocsAndPositionsEnum(field); } } if (reuse != postings) { SetOther(postings, reuse); // postings.other = reuse } return postings.reset(liveDocs, termState); } else { if (reuse is PulsingDocsAndPositionsEnum) { DocsAndPositionsEnum wrapped = _wrappedPostingsReader.DocsAndPositions(field, termState.WrappedTermState, liveDocs, (DocsAndPositionsEnum) GetOther(reuse), flags); SetOther(wrapped, reuse); // wrapped.other = reuse return wrapped; } else { return _wrappedPostingsReader.DocsAndPositions(field, termState.WrappedTermState, liveDocs, reuse, flags); } } }
private void FlushBlock() { //System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer()); // First pass: compute common prefix for all terms // in the block, against term before first term in // this block: int commonPrefix = SharedPrefix(lastPrevTerm, pendingTerms[0].Term); for (int termCount = 1; termCount < pendingCount; termCount++) { commonPrefix = Math.Min(commonPrefix, SharedPrefix(lastPrevTerm, pendingTerms[termCount].Term)); } outerInstance.m_output.WriteVInt32(pendingCount); outerInstance.m_output.WriteVInt32(commonPrefix); // 2nd pass: write suffixes, as separate byte[] blob for (int termCount = 0; termCount < pendingCount; termCount++) { int suffix = pendingTerms[termCount].Term.Length - commonPrefix; // TODO: cutover to better intblock codec, instead // of interleaving here: bytesWriter.WriteVInt32(suffix); bytesWriter.WriteBytes(pendingTerms[termCount].Term.Bytes, commonPrefix, suffix); } outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream bytesWriter.WriteTo(outerInstance.m_output); bytesWriter.Reset(); // 3rd pass: write the freqs as byte[] blob // TODO: cutover to better intblock codec. simple64? // write prefix, suffix first: for (int termCount = 0; termCount < pendingCount; termCount++) { BlockTermState state = pendingTerms[termCount].State; if (Debugging.AssertsEnabled) { Debugging.Assert(state != null); } bytesWriter.WriteVInt32(state.DocFreq); if (fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY) { bytesWriter.WriteVInt64(state.TotalTermFreq - state.DocFreq); } } outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream bytesWriter.WriteTo(outerInstance.m_output); bytesWriter.Reset(); // 4th pass: write the metadata long[] longs = new long[longsSize]; bool absolute = true; for (int termCount = 0; termCount < pendingCount; termCount++) { BlockTermState state = pendingTerms[termCount].State; postingsWriter.EncodeTerm(longs, bufferWriter, fieldInfo, state, absolute); for (int i = 0; i < longsSize; i++) { bytesWriter.WriteVInt64(longs[i]); } bufferWriter.WriteTo(bytesWriter); bufferWriter.Reset(); absolute = false; } outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream bytesWriter.WriteTo(outerInstance.m_output); bytesWriter.Reset(); lastPrevTerm.CopyBytes(pendingTerms[pendingCount - 1].Term); pendingCount = 0; }
public override DocsAndPositionsEnum DocsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { bool hasOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; // TODO: can we optimize if FLAG_PAYLOADS / FLAG_OFFSETS // isn't passed? // TODO: refactor if (fieldInfo.HasPayloads() || hasOffsets) { SegmentFullPositionsEnum docsEnum; if (reuse == null || !(reuse is SegmentFullPositionsEnum)) { docsEnum = new SegmentFullPositionsEnum(this, FreqIn, ProxIn); } else { docsEnum = (SegmentFullPositionsEnum)reuse; if (docsEnum.StartFreqIn != FreqIn) { // If you are using ParellelReader, and pass in a // reused DocsEnum, it could have come from another // reader also using standard codec docsEnum = new SegmentFullPositionsEnum(this, FreqIn, ProxIn); } } return docsEnum.Reset(fieldInfo, (StandardTermState)termState, liveDocs); } else { SegmentDocsAndPositionsEnum docsEnum; if (reuse == null || !(reuse is SegmentDocsAndPositionsEnum)) { docsEnum = new SegmentDocsAndPositionsEnum(this, FreqIn, ProxIn); } else { docsEnum = (SegmentDocsAndPositionsEnum)reuse; if (docsEnum.StartFreqIn != FreqIn) { // If you are using ParellelReader, and pass in a // reused DocsEnum, it could have come from another // reader also using standard codec docsEnum = new SegmentDocsAndPositionsEnum(this, FreqIn, ProxIn); } } return docsEnum.Reset(fieldInfo, (StandardTermState)termState, liveDocs); } }
public override void EncodeTerm(long[] empty, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute) { StandardTermState state = (StandardTermState)_state; if (absolute) { LastState = EmptyState; } @out.WriteVLong(state.FreqStart - LastState.FreqStart); if (state.SkipOffset != -1) { Debug.Assert(state.SkipOffset > 0); @out.WriteVLong(state.SkipOffset); } if (IndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { @out.WriteVLong(state.ProxStart - LastState.ProxStart); } LastState = state; }
public override void DecodeTerm(long[] longs, DataInput @in, FieldInfo fieldInfo, BlockTermState termState, bool absolute) { StandardTermState termState2 = (StandardTermState)termState; // if (DEBUG) System.out.println("SPR: nextTerm seg=" + segment + " tbOrd=" + termState2.termBlockOrd + " bytesReader.fp=" + termState.bytesReader.getPosition()); //bool isFirstTerm = termState2.TermBlockOrd == 0; // LUCENENET: IDE0059: Remove unnecessary value assignment if (absolute) { termState2.freqOffset = 0; termState2.proxOffset = 0; } termState2.freqOffset += @in.ReadVInt64(); /* * if (DEBUG) { * System.out.println(" dF=" + termState2.docFreq); * System.out.println(" freqFP=" + termState2.freqOffset); * } */ if (Debugging.AssertsEnabled) { Debugging.Assert(termState2.freqOffset < freqIn.Length); } if (termState2.DocFreq >= skipMinimum) { termState2.skipOffset = @in.ReadVInt64(); // if (DEBUG) System.out.println(" skipOffset=" + termState2.skipOffset + " vs freqIn.length=" + freqIn.length()); if (Debugging.AssertsEnabled) { Debugging.Assert(termState2.freqOffset + termState2.skipOffset < freqIn.Length); } } else { // undefined } // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare() if (IndexOptionsComparer.Default.Compare(fieldInfo.IndexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { termState2.proxOffset += @in.ReadVInt64(); // if (DEBUG) System.out.println(" proxFP=" + termState2.proxOffset); } }
public override DocsAndPositionsEnum DocsAndPositions(FieldInfo fieldInfo, BlockTermState bTermState, Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { Debug.Assert(fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); var termState = (SepTermState)bTermState; SepDocsAndPositionsEnum postingsEnum; if (!(reuse is SepDocsAndPositionsEnum)) { postingsEnum = new SepDocsAndPositionsEnum(this); } else { postingsEnum = (SepDocsAndPositionsEnum) reuse; if (postingsEnum.START_DOC_IN != _docIn) { // If you are using ParellelReader, and pass in a // reused DocsAndPositionsEnum, it could have come // from another reader also using sep codec postingsEnum = new SepDocsAndPositionsEnum(this); } } return postingsEnum.Init(fieldInfo, termState, liveDocs); }
public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) { if (CanReuse(reuse, liveDocs)) { // if (DEBUG) System.out.println("SPR.docs ts=" + termState); return ((SegmentDocsEnumBase)reuse).Reset(fieldInfo, (StandardTermState)termState); } return NewDocsEnum(liveDocs, fieldInfo, (StandardTermState)termState); }
public override void DecodeTerm(long[] longs, DataInput @in, FieldInfo fieldInfo, BlockTermState _termState, bool absolute) { Lucene41PostingsWriter.IntBlockTermState termState = (Lucene41PostingsWriter.IntBlockTermState)_termState; bool fieldHasPositions = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; bool fieldHasOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; bool fieldHasPayloads = fieldInfo.HasPayloads(); if (absolute) { termState.DocStartFP = 0; termState.PosStartFP = 0; termState.PayStartFP = 0; } if (Version < Lucene41PostingsWriter.VERSION_META_ARRAY) // backward compatibility { _decodeTerm(@in, fieldInfo, termState); return; } termState.DocStartFP += longs[0]; if (fieldHasPositions) { termState.PosStartFP += longs[1]; if (fieldHasOffsets || fieldHasPayloads) { termState.PayStartFP += longs[2]; } } if (termState.DocFreq == 1) { termState.SingletonDocID = @in.ReadVInt(); } else { termState.SingletonDocID = -1; } if (fieldHasPositions) { if (termState.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE) { termState.LastPosBlockOffset = @in.ReadVLong(); } else { termState.LastPosBlockOffset = -1; } } if (termState.DocFreq > Lucene41PostingsFormat.BLOCK_SIZE) { termState.SkipOffset = @in.ReadVLong(); } else { termState.SkipOffset = -1; } }
public override void EncodeTerm(long[] empty, DataOutput @out, FieldInfo fieldInfo, BlockTermState state, bool absolute) { StandardTermState state_ = (StandardTermState)state; if (absolute) { lastState = emptyState; } @out.WriteVInt64(state_.FreqStart - lastState.FreqStart); if (state_.SkipOffset != -1) { Debug.Assert(state_.SkipOffset > 0); @out.WriteVInt64(state_.SkipOffset); } if (indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { @out.WriteVInt64(state_.ProxStart - lastState.ProxStart); } lastState = state_; }
public override void DecodeTerm(long[] longs, DataInput @in, FieldInfo fieldInfo, BlockTermState termState, bool absolute) { StandardTermState termState2 = (StandardTermState)termState; // if (DEBUG) System.out.println("SPR: nextTerm seg=" + segment + " tbOrd=" + termState2.termBlockOrd + " bytesReader.fp=" + termState.bytesReader.getPosition()); bool isFirstTerm = termState2.TermBlockOrd == 0; if (absolute) { termState2.freqOffset = 0; termState2.proxOffset = 0; } termState2.freqOffset += @in.ReadVInt64(); /* * if (DEBUG) { * System.out.println(" dF=" + termState2.docFreq); * System.out.println(" freqFP=" + termState2.freqOffset); * } */ Debug.Assert(termState2.freqOffset < freqIn.Length); if (termState2.DocFreq >= skipMinimum) { termState2.skipOffset = @in.ReadVInt64(); // if (DEBUG) System.out.println(" skipOffset=" + termState2.skipOffset + " vs freqIn.length=" + freqIn.length()); Debug.Assert(termState2.freqOffset + termState2.skipOffset < freqIn.Length); } else { // undefined } if (fieldInfo.IndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { termState2.proxOffset += @in.ReadVInt64(); // if (DEBUG) System.out.println(" proxFP=" + termState2.proxOffset); } }
public override void DecodeTerm(long[] empty, DataInput input, FieldInfo fieldInfo, BlockTermState bTermState, bool absolute) { var termState = (SepTermState)bTermState; termState.DOC_INDEX.Read(input, absolute); if (fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY) { termState.FREQ_INDEX.Read(input, absolute); if (fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { termState.POS_INDEX.Read(input, absolute); if (fieldInfo.HasPayloads()) { if (absolute) { termState.PAYLOAD_FP = input.ReadVLong(); } else { termState.PAYLOAD_FP += input.ReadVLong(); } } } } if (termState.DocFreq >= _skipMinimum) { if (absolute) { termState.SKIP_FP = input.ReadVLong(); } else { termState.SKIP_FP += input.ReadVLong(); } } else if (absolute) { termState.SKIP_FP = 0; } }
/// <summary> /// Called when we are done adding docs to this term. </summary> public override void FinishTerm(BlockTermState state) { Int32BlockTermState state2 = (Int32BlockTermState)state; if (Debugging.AssertsEnabled) { Debugging.Assert(state2.DocFreq > 0); } // TODO: wasteful we are counting this (counting # docs // for this term) in two places? if (Debugging.AssertsEnabled) { Debugging.Assert(state2.DocFreq == docCount, "{0} vs {1}", state2.DocFreq, docCount); } // if (DEBUG) { // System.out.println("FPW.finishTerm docFreq=" + state2.docFreq); // } // if (DEBUG) { // if (docBufferUpto > 0) { // System.out.println(" write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docStartFP=" + docStartFP); // } // } // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it. int singletonDocID; if (state2.DocFreq == 1) { // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq singletonDocID = docDeltaBuffer[0]; } else { singletonDocID = -1; // vInt encode the remaining doc deltas and freqs: for (int i = 0; i < docBufferUpto; i++) { int docDelta = docDeltaBuffer[i]; int freq = freqBuffer[i]; if (!fieldHasFreqs) { docOut.WriteVInt32(docDelta); } else if (freqBuffer[i] == 1) { docOut.WriteVInt32((docDelta << 1) | 1); } else { docOut.WriteVInt32(docDelta << 1); docOut.WriteVInt32(freq); } } } long lastPosBlockOffset; if (fieldHasPositions) { // if (DEBUG) { // if (posBufferUpto > 0) { // System.out.println(" write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posStartFP=" + posStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets); // } // } // totalTermFreq is just total number of positions(or payloads, or offsets) // associated with current term. if (Debugging.AssertsEnabled) { Debugging.Assert(state2.TotalTermFreq != -1); } if (state2.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE) { // record file offset for last pos in last block lastPosBlockOffset = posOut.Position - posStartFP; // LUCENENET specific: Renamed from getFilePointer() to match FileStream } else { lastPosBlockOffset = -1; } if (posBufferUpto > 0) { // TODO: should we send offsets/payloads to // .pay...? seems wasteful (have to store extra // vLong for low (< BLOCK_SIZE) DF terms = vast vast // majority) // vInt encode the remaining positions/payloads/offsets: int lastPayloadLength = -1; // force first payload length to be written int lastOffsetLength = -1; // force first offset length to be written int payloadBytesReadUpto = 0; for (int i = 0; i < posBufferUpto; i++) { int posDelta = posDeltaBuffer[i]; if (fieldHasPayloads) { int payloadLength = payloadLengthBuffer[i]; if (payloadLength != lastPayloadLength) { lastPayloadLength = payloadLength; posOut.WriteVInt32((posDelta << 1) | 1); posOut.WriteVInt32(payloadLength); } else { posOut.WriteVInt32(posDelta << 1); } // if (DEBUG) { // System.out.println(" i=" + i + " payloadLen=" + payloadLength); // } if (payloadLength != 0) { // if (DEBUG) { // System.out.println(" write payload @ pos.fp=" + posOut.getFilePointer()); // } posOut.WriteBytes(payloadBytes, payloadBytesReadUpto, payloadLength); payloadBytesReadUpto += payloadLength; } } else { posOut.WriteVInt32(posDelta); } if (fieldHasOffsets) { // if (DEBUG) { // System.out.println(" write offset @ pos.fp=" + posOut.getFilePointer()); // } int delta = offsetStartDeltaBuffer[i]; int length = offsetLengthBuffer[i]; if (length == lastOffsetLength) { posOut.WriteVInt32(delta << 1); } else { posOut.WriteVInt32(delta << 1 | 1); posOut.WriteVInt32(length); lastOffsetLength = length; } } } if (fieldHasPayloads) { if (Debugging.AssertsEnabled) { Debugging.Assert(payloadBytesReadUpto == payloadByteUpto); } payloadByteUpto = 0; } } // if (DEBUG) { // System.out.println(" totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset); // } } else { lastPosBlockOffset = -1; } long skipOffset; if (docCount > Lucene41PostingsFormat.BLOCK_SIZE) { skipOffset = skipWriter.WriteSkip(docOut) - docStartFP; // if (DEBUG) { // System.out.println("skip packet " + (docOut.getFilePointer() - (docStartFP + skipOffset)) + " bytes"); // } } else { skipOffset = -1; // if (DEBUG) { // System.out.println(" no skip: docCount=" + docCount); // } } // if (DEBUG) { // System.out.println(" payStartFP=" + payStartFP); // } state2.docStartFP = docStartFP; state2.posStartFP = posStartFP; state2.payStartFP = payStartFP; state2.singletonDocID = singletonDocID; state2.skipOffset = skipOffset; state2.lastPosBlockOffset = lastPosBlockOffset; docBufferUpto = 0; posBufferUpto = 0; lastDocID = 0; docCount = 0; }
public override void DecodeTerm(long[] empty, DataInput input, FieldInfo fieldInfo, BlockTermState termState, bool absolute) { SepTermState termState_ = (SepTermState)termState; termState_.docIndex.Read(input, absolute); if (fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY) { termState_.freqIndex.Read(input, absolute); if (fieldInfo.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { //System.out.println(" freqIndex=" + termState.freqIndex); termState_.posIndex.Read(input, absolute); //System.out.println(" posIndex=" + termState.posIndex); if (fieldInfo.HasPayloads) { if (absolute) { termState_.payloadFP = input.ReadVInt64(); } else { termState_.payloadFP += input.ReadVInt64(); } //System.out.println(" payloadFP=" + termState.payloadFP); } } } if (termState_.DocFreq >= skipMinimum) { //System.out.println(" readSkip @ " + in.getPosition()); if (absolute) { termState_.skipFP = input.ReadVInt64(); } else { termState_.skipFP += input.ReadVInt64(); } //System.out.println(" skipFP=" + termState.skipFP); } else if (absolute) { termState_.skipFP = 0; } }
public override void EncodeTerm(long[] empty, DataOutput @out, FieldInfo fieldInfo, BlockTermState state, bool absolute) { StandardTermState state_ = (StandardTermState)state; if (absolute) { lastState = emptyState; } @out.WriteVInt64(state_.FreqStart - lastState.FreqStart); if (state_.SkipOffset != -1) { if (Debugging.AssertsEnabled) { Debugging.Assert(state_.SkipOffset > 0); } @out.WriteVInt64(state_.SkipOffset); } // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare() if (IndexOptionsComparer.Default.Compare(indexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { @out.WriteVInt64(state_.ProxStart - lastState.ProxStart); } lastState = state_; }
public override void EncodeTerm(long[] empty, DataOutput output, FieldInfo fieldInfo, BlockTermState state, bool abs) { var _state = (PulsingTermState)state; Debug.Assert(empty.Length == 0); _absolute = _absolute || abs; if (_state.bytes == null) { _wrappedPostingsWriter.EncodeTerm(_longs, _buffer, fieldInfo, _state.wrappedState, _absolute); for (var i = 0; i < _longsSize; i++) { output.WriteVInt64(_longs[i]); } _buffer.WriteTo(output); _buffer.Reset(); _absolute = false; } else { output.WriteVInt32(_state.bytes.Length); output.WriteBytes(_state.bytes, 0, _state.bytes.Length); _absolute = _absolute || abs; } }
public override void DecodeTerm(long[] empty, DataInput input, FieldInfo fieldInfo, BlockTermState bTermState, bool absolute) { var termState = (SepTermState) bTermState; termState.DOC_INDEX.Read(input, absolute); if (fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY) { termState.FREQ_INDEX.Read(input, absolute); if (fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { termState.POS_INDEX.Read(input, absolute); if (fieldInfo.HasPayloads()) { if (absolute) { termState.PAYLOAD_FP = input.ReadVLong(); } else { termState.PAYLOAD_FP += input.ReadVLong(); } } } } if (termState.DocFreq >= _skipMinimum) { if (absolute) { termState.SKIP_FP = input.ReadVLong(); } else { termState.SKIP_FP += input.ReadVLong(); } } else if (absolute) { termState.SKIP_FP = 0; } }