Exemple #1
0
        /// <summary>
        /// Called when we are done adding docs to this term </summary>
        public override void FinishTerm(BlockTermState _state)
        {
            IntBlockTermState state = (IntBlockTermState)_state;

            Debug.Assert(state.DocFreq > 0);

            // TODO: wasteful we are counting this (counting # docs
            // for this term) in two places?
            Debug.Assert(state.DocFreq == DocCount, state.DocFreq + " vs " + DocCount);

            // if (DEBUG) {
            //   System.out.println("FPW.finishTerm docFreq=" + state.docFreq);
            // }

            // if (DEBUG) {
            //   if (docBufferUpto > 0) {
            //     System.out.println("  write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docStartFP=" + docStartFP);
            //   }
            // }

            // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it.
            int singletonDocID;

            if (state.DocFreq == 1)
            {
                // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
                singletonDocID = DocDeltaBuffer[0];
            }
            else
            {
                singletonDocID = -1;
                // vInt encode the remaining doc deltas and freqs:
                for (int i = 0; i < DocBufferUpto; i++)
                {
                    int docDelta = DocDeltaBuffer[i];
                    int freq     = FreqBuffer[i];
                    if (!FieldHasFreqs)
                    {
                        DocOut.WriteVInt(docDelta);
                    }
                    else if (FreqBuffer[i] == 1)
                    {
                        DocOut.WriteVInt((docDelta << 1) | 1);
                    }
                    else
                    {
                        DocOut.WriteVInt(docDelta << 1);
                        DocOut.WriteVInt(freq);
                    }
                }
            }

            long lastPosBlockOffset;

            if (FieldHasPositions)
            {
                // if (DEBUG) {
                //   if (posBufferUpto > 0) {
                //     System.out.println("  write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posStartFP=" + posStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets);
                //   }
                // }

                // totalTermFreq is just total number of positions(or payloads, or offsets)
                // associated with current term.
                Debug.Assert(state.TotalTermFreq != -1);
                if (state.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE)
                {
                    // record file offset for last pos in last block
                    lastPosBlockOffset = PosOut.FilePointer - PosStartFP;
                }
                else
                {
                    lastPosBlockOffset = -1;
                }
                if (PosBufferUpto > 0)
                {
                    // TODO: should we send offsets/payloads to
                    // .pay...?  seems wasteful (have to store extra
                    // vLong for low (< BLOCK_SIZE) DF terms = vast vast
                    // majority)

                    // vInt encode the remaining positions/payloads/offsets:
                    int lastPayloadLength    = -1; // force first payload length to be written
                    int lastOffsetLength     = -1; // force first offset length to be written
                    int payloadBytesReadUpto = 0;
                    for (int i = 0; i < PosBufferUpto; i++)
                    {
                        int posDelta = PosDeltaBuffer[i];
                        if (FieldHasPayloads)
                        {
                            int payloadLength = PayloadLengthBuffer[i];
                            if (payloadLength != lastPayloadLength)
                            {
                                lastPayloadLength = payloadLength;
                                PosOut.WriteVInt((posDelta << 1) | 1);
                                PosOut.WriteVInt(payloadLength);
                            }
                            else
                            {
                                PosOut.WriteVInt(posDelta << 1);
                            }

                            // if (DEBUG) {
                            //   System.out.println("        i=" + i + " payloadLen=" + payloadLength);
                            // }

                            if (payloadLength != 0)
                            {
                                // if (DEBUG) {
                                //   System.out.println("          write payload @ pos.fp=" + posOut.getFilePointer());
                                // }
                                PosOut.WriteBytes(PayloadBytes, payloadBytesReadUpto, payloadLength);
                                payloadBytesReadUpto += payloadLength;
                            }
                        }
                        else
                        {
                            PosOut.WriteVInt(posDelta);
                        }

                        if (FieldHasOffsets)
                        {
                            // if (DEBUG) {
                            //   System.out.println("          write offset @ pos.fp=" + posOut.getFilePointer());
                            // }
                            int delta  = OffsetStartDeltaBuffer[i];
                            int length = OffsetLengthBuffer[i];
                            if (length == lastOffsetLength)
                            {
                                PosOut.WriteVInt(delta << 1);
                            }
                            else
                            {
                                PosOut.WriteVInt(delta << 1 | 1);
                                PosOut.WriteVInt(length);
                                lastOffsetLength = length;
                            }
                        }
                    }

                    if (FieldHasPayloads)
                    {
                        Debug.Assert(payloadBytesReadUpto == PayloadByteUpto);
                        PayloadByteUpto = 0;
                    }
                }
                // if (DEBUG) {
                //   System.out.println("  totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset);
                // }
            }
            else
            {
                lastPosBlockOffset = -1;
            }

            long skipOffset;

            if (DocCount > Lucene41PostingsFormat.BLOCK_SIZE)
            {
                skipOffset = SkipWriter.WriteSkip(DocOut) - DocStartFP;

                // if (DEBUG) {
                //   System.out.println("skip packet " + (docOut.getFilePointer() - (docStartFP + skipOffset)) + " bytes");
                // }
            }
            else
            {
                skipOffset = -1;
                // if (DEBUG) {
                //   System.out.println("  no skip: docCount=" + docCount);
                // }
            }
            // if (DEBUG) {
            //   System.out.println("  payStartFP=" + payStartFP);
            // }
            state.DocStartFP         = DocStartFP;
            state.PosStartFP         = PosStartFP;
            state.PayStartFP         = PayStartFP;
            state.SingletonDocID     = singletonDocID;
            state.SkipOffset         = skipOffset;
            state.LastPosBlockOffset = lastPosBlockOffset;
            DocBufferUpto            = 0;
            PosBufferUpto            = 0;
            LastDocID = 0;
            DocCount  = 0;
        }
        /// <summary>
        /// Called when we are done adding docs to this term. </summary>
        public override void FinishTerm(BlockTermState state)
        {
            Int32BlockTermState state2 = (Int32BlockTermState)state;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(state2.DocFreq > 0);
            }

            // TODO: wasteful we are counting this (counting # docs
            // for this term) in two places?
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(state2.DocFreq == docCount, "{0} vs {1}", state2.DocFreq, docCount);
            }

            // if (DEBUG) {
            //   System.out.println("FPW.finishTerm docFreq=" + state2.docFreq);
            // }

            // if (DEBUG) {
            //   if (docBufferUpto > 0) {
            //     System.out.println("  write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docStartFP=" + docStartFP);
            //   }
            // }

            // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it.
            int singletonDocID;

            if (state2.DocFreq == 1)
            {
                // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
                singletonDocID = docDeltaBuffer[0];
            }
            else
            {
                singletonDocID = -1;
                // vInt encode the remaining doc deltas and freqs:
                for (int i = 0; i < docBufferUpto; i++)
                {
                    int docDelta = docDeltaBuffer[i];
                    int freq     = freqBuffer[i];
                    if (!fieldHasFreqs)
                    {
                        docOut.WriteVInt32(docDelta);
                    }
                    else if (freqBuffer[i] == 1)
                    {
                        docOut.WriteVInt32((docDelta << 1) | 1);
                    }
                    else
                    {
                        docOut.WriteVInt32(docDelta << 1);
                        docOut.WriteVInt32(freq);
                    }
                }
            }

            long lastPosBlockOffset;

            if (fieldHasPositions)
            {
                // if (DEBUG) {
                //   if (posBufferUpto > 0) {
                //     System.out.println("  write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posStartFP=" + posStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets);
                //   }
                // }

                // totalTermFreq is just total number of positions(or payloads, or offsets)
                // associated with current term.
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(state2.TotalTermFreq != -1);
                }
                if (state2.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE)
                {
                    // record file offset for last pos in last block
                    lastPosBlockOffset = posOut.Position - posStartFP; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                }
                else
                {
                    lastPosBlockOffset = -1;
                }
                if (posBufferUpto > 0)
                {
                    // TODO: should we send offsets/payloads to
                    // .pay...?  seems wasteful (have to store extra
                    // vLong for low (< BLOCK_SIZE) DF terms = vast vast
                    // majority)

                    // vInt encode the remaining positions/payloads/offsets:
                    int lastPayloadLength    = -1; // force first payload length to be written
                    int lastOffsetLength     = -1; // force first offset length to be written
                    int payloadBytesReadUpto = 0;
                    for (int i = 0; i < posBufferUpto; i++)
                    {
                        int posDelta = posDeltaBuffer[i];
                        if (fieldHasPayloads)
                        {
                            int payloadLength = payloadLengthBuffer[i];
                            if (payloadLength != lastPayloadLength)
                            {
                                lastPayloadLength = payloadLength;
                                posOut.WriteVInt32((posDelta << 1) | 1);
                                posOut.WriteVInt32(payloadLength);
                            }
                            else
                            {
                                posOut.WriteVInt32(posDelta << 1);
                            }

                            // if (DEBUG) {
                            //   System.out.println("        i=" + i + " payloadLen=" + payloadLength);
                            // }

                            if (payloadLength != 0)
                            {
                                // if (DEBUG) {
                                //   System.out.println("          write payload @ pos.fp=" + posOut.getFilePointer());
                                // }
                                posOut.WriteBytes(payloadBytes, payloadBytesReadUpto, payloadLength);
                                payloadBytesReadUpto += payloadLength;
                            }
                        }
                        else
                        {
                            posOut.WriteVInt32(posDelta);
                        }

                        if (fieldHasOffsets)
                        {
                            // if (DEBUG) {
                            //   System.out.println("          write offset @ pos.fp=" + posOut.getFilePointer());
                            // }
                            int delta  = offsetStartDeltaBuffer[i];
                            int length = offsetLengthBuffer[i];
                            if (length == lastOffsetLength)
                            {
                                posOut.WriteVInt32(delta << 1);
                            }
                            else
                            {
                                posOut.WriteVInt32(delta << 1 | 1);
                                posOut.WriteVInt32(length);
                                lastOffsetLength = length;
                            }
                        }
                    }

                    if (fieldHasPayloads)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(payloadBytesReadUpto == payloadByteUpto);
                        }
                        payloadByteUpto = 0;
                    }
                }
                // if (DEBUG) {
                //   System.out.println("  totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset);
                // }
            }
            else
            {
                lastPosBlockOffset = -1;
            }

            long skipOffset;

            if (docCount > Lucene41PostingsFormat.BLOCK_SIZE)
            {
                skipOffset = skipWriter.WriteSkip(docOut) - docStartFP;

                // if (DEBUG) {
                //   System.out.println("skip packet " + (docOut.getFilePointer() - (docStartFP + skipOffset)) + " bytes");
                // }
            }
            else
            {
                skipOffset = -1;
                // if (DEBUG) {
                //   System.out.println("  no skip: docCount=" + docCount);
                // }
            }
            // if (DEBUG) {
            //   System.out.println("  payStartFP=" + payStartFP);
            // }
            state2.docStartFP         = docStartFP;
            state2.posStartFP         = posStartFP;
            state2.payStartFP         = payStartFP;
            state2.singletonDocID     = singletonDocID;
            state2.skipOffset         = skipOffset;
            state2.lastPosBlockOffset = lastPosBlockOffset;
            docBufferUpto             = 0;
            posBufferUpto             = 0;
            lastDocID = 0;
            docCount  = 0;
        }