/// <summary> /// Called when we are done adding docs to this term </summary> public override void FinishTerm(BlockTermState _state) { IntBlockTermState state = (IntBlockTermState)_state; Debug.Assert(state.DocFreq > 0); // TODO: wasteful we are counting this (counting # docs // for this term) in two places? Debug.Assert(state.DocFreq == DocCount, state.DocFreq + " vs " + DocCount); // if (DEBUG) { // System.out.println("FPW.finishTerm docFreq=" + state.docFreq); // } // if (DEBUG) { // if (docBufferUpto > 0) { // System.out.println(" write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docStartFP=" + docStartFP); // } // } // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it. int singletonDocID; if (state.DocFreq == 1) { // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq singletonDocID = DocDeltaBuffer[0]; } else { singletonDocID = -1; // vInt encode the remaining doc deltas and freqs: for (int i = 0; i < DocBufferUpto; i++) { int docDelta = DocDeltaBuffer[i]; int freq = FreqBuffer[i]; if (!FieldHasFreqs) { DocOut.WriteVInt(docDelta); } else if (FreqBuffer[i] == 1) { DocOut.WriteVInt((docDelta << 1) | 1); } else { DocOut.WriteVInt(docDelta << 1); DocOut.WriteVInt(freq); } } } long lastPosBlockOffset; if (FieldHasPositions) { // if (DEBUG) { // if (posBufferUpto > 0) { // System.out.println(" write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posStartFP=" + posStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets); // } // } // totalTermFreq is just total number of positions(or payloads, or offsets) // associated with current term. Debug.Assert(state.TotalTermFreq != -1); if (state.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE) { // record file offset for last pos in last block lastPosBlockOffset = PosOut.FilePointer - PosStartFP; } else { lastPosBlockOffset = -1; } if (PosBufferUpto > 0) { // TODO: should we send offsets/payloads to // .pay...? seems wasteful (have to store extra // vLong for low (< BLOCK_SIZE) DF terms = vast vast // majority) // vInt encode the remaining positions/payloads/offsets: int lastPayloadLength = -1; // force first payload length to be written int lastOffsetLength = -1; // force first offset length to be written int payloadBytesReadUpto = 0; for (int i = 0; i < PosBufferUpto; i++) { int posDelta = PosDeltaBuffer[i]; if (FieldHasPayloads) { int payloadLength = PayloadLengthBuffer[i]; if (payloadLength != lastPayloadLength) { lastPayloadLength = payloadLength; PosOut.WriteVInt((posDelta << 1) | 1); PosOut.WriteVInt(payloadLength); } else { PosOut.WriteVInt(posDelta << 1); } // if (DEBUG) { // System.out.println(" i=" + i + " payloadLen=" + payloadLength); // } if (payloadLength != 0) { // if (DEBUG) { // System.out.println(" write payload @ pos.fp=" + posOut.getFilePointer()); // } PosOut.WriteBytes(PayloadBytes, payloadBytesReadUpto, payloadLength); payloadBytesReadUpto += payloadLength; } } else { PosOut.WriteVInt(posDelta); } if (FieldHasOffsets) { // if (DEBUG) { // System.out.println(" write offset @ pos.fp=" + posOut.getFilePointer()); // } int delta = OffsetStartDeltaBuffer[i]; int length = OffsetLengthBuffer[i]; if (length == lastOffsetLength) { PosOut.WriteVInt(delta << 1); } else { PosOut.WriteVInt(delta << 1 | 1); PosOut.WriteVInt(length); lastOffsetLength = length; } } } if (FieldHasPayloads) { Debug.Assert(payloadBytesReadUpto == PayloadByteUpto); PayloadByteUpto = 0; } } // if (DEBUG) { // System.out.println(" totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset); // } } else { lastPosBlockOffset = -1; } long skipOffset; if (DocCount > Lucene41PostingsFormat.BLOCK_SIZE) { skipOffset = SkipWriter.WriteSkip(DocOut) - DocStartFP; // if (DEBUG) { // System.out.println("skip packet " + (docOut.getFilePointer() - (docStartFP + skipOffset)) + " bytes"); // } } else { skipOffset = -1; // if (DEBUG) { // System.out.println(" no skip: docCount=" + docCount); // } } // if (DEBUG) { // System.out.println(" payStartFP=" + payStartFP); // } state.DocStartFP = DocStartFP; state.PosStartFP = PosStartFP; state.PayStartFP = PayStartFP; state.SingletonDocID = singletonDocID; state.SkipOffset = skipOffset; state.LastPosBlockOffset = lastPosBlockOffset; DocBufferUpto = 0; PosBufferUpto = 0; LastDocID = 0; DocCount = 0; }
/// <summary> /// Called when we are done adding docs to this term. </summary> public override void FinishTerm(BlockTermState state) { Int32BlockTermState state2 = (Int32BlockTermState)state; if (Debugging.AssertsEnabled) { Debugging.Assert(state2.DocFreq > 0); } // TODO: wasteful we are counting this (counting # docs // for this term) in two places? if (Debugging.AssertsEnabled) { Debugging.Assert(state2.DocFreq == docCount, "{0} vs {1}", state2.DocFreq, docCount); } // if (DEBUG) { // System.out.println("FPW.finishTerm docFreq=" + state2.docFreq); // } // if (DEBUG) { // if (docBufferUpto > 0) { // System.out.println(" write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docStartFP=" + docStartFP); // } // } // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it. int singletonDocID; if (state2.DocFreq == 1) { // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq singletonDocID = docDeltaBuffer[0]; } else { singletonDocID = -1; // vInt encode the remaining doc deltas and freqs: for (int i = 0; i < docBufferUpto; i++) { int docDelta = docDeltaBuffer[i]; int freq = freqBuffer[i]; if (!fieldHasFreqs) { docOut.WriteVInt32(docDelta); } else if (freqBuffer[i] == 1) { docOut.WriteVInt32((docDelta << 1) | 1); } else { docOut.WriteVInt32(docDelta << 1); docOut.WriteVInt32(freq); } } } long lastPosBlockOffset; if (fieldHasPositions) { // if (DEBUG) { // if (posBufferUpto > 0) { // System.out.println(" write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posStartFP=" + posStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets); // } // } // totalTermFreq is just total number of positions(or payloads, or offsets) // associated with current term. if (Debugging.AssertsEnabled) { Debugging.Assert(state2.TotalTermFreq != -1); } if (state2.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE) { // record file offset for last pos in last block lastPosBlockOffset = posOut.Position - posStartFP; // LUCENENET specific: Renamed from getFilePointer() to match FileStream } else { lastPosBlockOffset = -1; } if (posBufferUpto > 0) { // TODO: should we send offsets/payloads to // .pay...? seems wasteful (have to store extra // vLong for low (< BLOCK_SIZE) DF terms = vast vast // majority) // vInt encode the remaining positions/payloads/offsets: int lastPayloadLength = -1; // force first payload length to be written int lastOffsetLength = -1; // force first offset length to be written int payloadBytesReadUpto = 0; for (int i = 0; i < posBufferUpto; i++) { int posDelta = posDeltaBuffer[i]; if (fieldHasPayloads) { int payloadLength = payloadLengthBuffer[i]; if (payloadLength != lastPayloadLength) { lastPayloadLength = payloadLength; posOut.WriteVInt32((posDelta << 1) | 1); posOut.WriteVInt32(payloadLength); } else { posOut.WriteVInt32(posDelta << 1); } // if (DEBUG) { // System.out.println(" i=" + i + " payloadLen=" + payloadLength); // } if (payloadLength != 0) { // if (DEBUG) { // System.out.println(" write payload @ pos.fp=" + posOut.getFilePointer()); // } posOut.WriteBytes(payloadBytes, payloadBytesReadUpto, payloadLength); payloadBytesReadUpto += payloadLength; } } else { posOut.WriteVInt32(posDelta); } if (fieldHasOffsets) { // if (DEBUG) { // System.out.println(" write offset @ pos.fp=" + posOut.getFilePointer()); // } int delta = offsetStartDeltaBuffer[i]; int length = offsetLengthBuffer[i]; if (length == lastOffsetLength) { posOut.WriteVInt32(delta << 1); } else { posOut.WriteVInt32(delta << 1 | 1); posOut.WriteVInt32(length); lastOffsetLength = length; } } } if (fieldHasPayloads) { if (Debugging.AssertsEnabled) { Debugging.Assert(payloadBytesReadUpto == payloadByteUpto); } payloadByteUpto = 0; } } // if (DEBUG) { // System.out.println(" totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset); // } } else { lastPosBlockOffset = -1; } long skipOffset; if (docCount > Lucene41PostingsFormat.BLOCK_SIZE) { skipOffset = skipWriter.WriteSkip(docOut) - docStartFP; // if (DEBUG) { // System.out.println("skip packet " + (docOut.getFilePointer() - (docStartFP + skipOffset)) + " bytes"); // } } else { skipOffset = -1; // if (DEBUG) { // System.out.println(" no skip: docCount=" + docCount); // } } // if (DEBUG) { // System.out.println(" payStartFP=" + payStartFP); // } state2.docStartFP = docStartFP; state2.posStartFP = posStartFP; state2.payStartFP = payStartFP; state2.singletonDocID = singletonDocID; state2.skipOffset = skipOffset; state2.lastPosBlockOffset = lastPosBlockOffset; docBufferUpto = 0; posBufferUpto = 0; lastDocID = 0; docCount = 0; }