Esempio n. 1
0
                internal BaseTermsEnum(TermsReader outerInstance)
                {
                    this.outerInstance = outerInstance;
                    this.state         = outerInstance.outerInstance.postingsReader.NewTermState();
                    this.term          = null;
                    this.statsReader.Reset(outerInstance.statsBlock);
                    this.metaLongsReader.Reset(outerInstance.metaLongsBlock);
                    this.metaBytesReader.Reset(outerInstance.metaBytesBlock);

                    this.longs                 = RectangularArrays.ReturnRectangularArray <long>(INTERVAL, outerInstance.longsSize);
                    this.bytesStart            = new int[INTERVAL];
                    this.bytesLength           = new int[INTERVAL];
                    this.docFreq_Renamed       = new int[INTERVAL];
                    this.totalTermFreq_Renamed = new long[INTERVAL];
                    this.statsBlockOrd         = -1;
                    this.metaBlockOrd          = -1;
                    if (!outerInstance.HasFreqs)
                    {
                        Arrays.Fill(totalTermFreq_Renamed, -1);
                    }
                }
Esempio n. 2
0
        /// <summary>
        /// Called when we are done adding docs to this term </summary>
        public override void FinishTerm(BlockTermState _state)
        {
            StandardTermState state = (StandardTermState)_state;

            // if (DEBUG) System.out.println("SPW: finishTerm seg=" + segment + " freqStart=" + freqStart);
            Debug.Assert(state.DocFreq > 0);

            // TODO: wasteful we are counting this (counting # docs
            // for this term) in two places?
            Debug.Assert(state.DocFreq == Df);
            state.FreqStart = FreqStart;
            state.ProxStart = ProxStart;
            if (Df >= SkipMinimum)
            {
                state.SkipOffset = SkipListWriter.WriteSkip(FreqOut) - FreqStart;
            }
            else
            {
                state.SkipOffset = -1;
            }
            LastDocID = 0;
            Df        = 0;
        }
Esempio n. 3
0
        public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, IBits liveDocs, DocsEnum reuse,
                                      DocsFlags flags)
        {
            SepTermState termState_ = (SepTermState)termState;
            SepDocsEnum  docsEnum;

            if (reuse == null || !(reuse is SepDocsEnum))
            {
                docsEnum = new SepDocsEnum(this);
            }
            else
            {
                docsEnum = (SepDocsEnum)reuse;
                if (docsEnum.startDocIn != docIn)
                {
                    // If you are using ParellelReader, and pass in a
                    // reused DocsAndPositionsEnum, it could have come
                    // from another reader also using sep codec
                    docsEnum = new SepDocsEnum(this);
                }
            }

            return(docsEnum.Init(fieldInfo, termState_, liveDocs));
        }
 public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags)
 {
     BlockDocsEnum docsEnum;
     if (reuse is BlockDocsEnum)
     {
         docsEnum = (BlockDocsEnum)reuse;
         if (!docsEnum.CanReuse(DocIn, fieldInfo))
         {
             docsEnum = new BlockDocsEnum(this, fieldInfo);
         }
     }
     else
     {
         docsEnum = new BlockDocsEnum(this, fieldInfo);
     }
     return docsEnum.Reset(liveDocs, (Lucene41PostingsWriter.IntBlockTermState)termState, flags);
 }
        /// <summary>
        /// Called when we are done adding docs to this term </summary>
        public override void FinishTerm(BlockTermState _state)
        {
            StandardTermState state = (StandardTermState)_state;
            // if (DEBUG) System.out.println("SPW: finishTerm seg=" + segment + " freqStart=" + freqStart);
            Debug.Assert(state.DocFreq > 0);

            // TODO: wasteful we are counting this (counting # docs
            // for this term) in two places?
            Debug.Assert(state.DocFreq == Df);
            state.FreqStart = FreqStart;
            state.ProxStart = ProxStart;
            if (Df >= SkipMinimum)
            {
                state.SkipOffset = SkipListWriter.WriteSkip(FreqOut) - FreqStart;
            }
            else
            {
                state.SkipOffset = -1;
            }
            LastDocID = 0;
            Df = 0;
        }
        public override void DecodeTerm(long[] longs, DataInput @in, FieldInfo fieldInfo, BlockTermState _termState, bool absolute)
        {
            StandardTermState termState = (StandardTermState)_termState;
            // if (DEBUG) System.out.println("SPR: nextTerm seg=" + segment + " tbOrd=" + termState.termBlockOrd + " bytesReader.fp=" + termState.bytesReader.getPosition());
            bool isFirstTerm = termState.TermBlockOrd == 0;
            if (absolute)
            {
                termState.FreqOffset = 0;
                termState.ProxOffset = 0;
            }

            termState.FreqOffset += @in.ReadVLong();
            /*
            if (DEBUG) {
              System.out.println("  dF=" + termState.docFreq);
              System.out.println("  freqFP=" + termState.freqOffset);
            }
            */
            Debug.Assert(termState.FreqOffset < FreqIn.Length());

            if (termState.DocFreq >= SkipMinimum)
            {
                termState.SkipOffset = @in.ReadVLong();
                // if (DEBUG) System.out.println("  skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
                Debug.Assert(termState.FreqOffset + termState.SkipOffset < FreqIn.Length());
            }
            else
            {
                // undefined
            }

            if (fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
            {
                termState.ProxOffset += @in.ReadVLong();
                // if (DEBUG) System.out.println("  proxFP=" + termState.proxOffset);
            }
        }
        public override void EncodeTerm(long[] longs, DataOutput @out, FieldInfo fieldInfo, BlockTermState state, bool absolute)
        {
            Int32BlockTermState state2 = (Int32BlockTermState)state;

            if (absolute)
            {
                lastState = emptyState;
            }
            longs[0] = state2.docStartFP - lastState.docStartFP;
            if (fieldHasPositions)
            {
                longs[1] = state2.posStartFP - lastState.posStartFP;
                if (fieldHasPayloads || fieldHasOffsets)
                {
                    longs[2] = state2.payStartFP - lastState.payStartFP;
                }
            }
            if (state2.singletonDocID != -1)
            {
                @out.WriteVInt32(state2.singletonDocID);
            }
            if (fieldHasPositions)
            {
                if (state2.lastPosBlockOffset != -1)
                {
                    @out.WriteVInt64(state2.lastPosBlockOffset);
                }
            }
            if (state2.skipOffset != -1)
            {
                @out.WriteVInt64(state2.skipOffset);
            }
            lastState = state2;
        }
Esempio n. 8
0
                public SegmentTermsEnum(FieldReader fieldReader, BlockTermsReader blockTermsReader)
                {
                    _fieldReader = fieldReader;
                    _blockTermsReader = blockTermsReader;

                    _input = (IndexInput) _blockTermsReader._input.Clone();
                    _input.Seek(_fieldReader._termsStartPointer);
                    _indexEnum = _blockTermsReader._indexReader.GetFieldEnum(_fieldReader._fieldInfo);
                    _doOrd = _blockTermsReader._indexReader.SupportsOrd;
                    _fieldTerm.Field = _fieldReader._fieldInfo.Name;
                    _state = _blockTermsReader._postingsReader.NewTermState();
                    _state.TotalTermFreq = -1;
                    _state.Ord = -1;

                    _termSuffixes = new byte[128];
                    _docFreqBytes = new byte[64];
                    _longs = new long[_fieldReader._longsSize];
                }
Esempio n. 9
0
        public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState bTermState, Bits liveDocs, DocsEnum reuse,
            int flags)
        {
            var termState = (SepTermState)bTermState;

            SepDocsEnum docsEnum;
            if (!(reuse is SepDocsEnum))
            {
                docsEnum = new SepDocsEnum(this);
            }
            else
            {
                docsEnum = (SepDocsEnum) reuse;
                if (docsEnum.START_DOC_IN != _docIn)
                {
                    // If you are using ParellelReader, and pass in a
                    // reused DocsAndPositionsEnum, it could have come
                    // from another reader also using sep codec
                    docsEnum = new SepDocsEnum(this);
                }
            }

            return docsEnum.Init(fieldInfo, termState, liveDocs);
        }
Esempio n. 10
0
        /// <summary>
        /// Called when we are done adding docs to this term </summary>
        public override void FinishTerm(BlockTermState _state)
        {
            IntBlockTermState state = (IntBlockTermState)_state;
            Debug.Assert(state.DocFreq > 0);

            // TODO: wasteful we are counting this (counting # docs
            // for this term) in two places?
            Debug.Assert(state.DocFreq == DocCount, state.DocFreq + " vs " + DocCount);

            // if (DEBUG) {
            //   System.out.println("FPW.finishTerm docFreq=" + state.docFreq);
            // }

            // if (DEBUG) {
            //   if (docBufferUpto > 0) {
            //     System.out.println("  write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docStartFP=" + docStartFP);
            //   }
            // }

            // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it.
            int singletonDocID;
            if (state.DocFreq == 1)
            {
                // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
                singletonDocID = DocDeltaBuffer[0];
            }
            else
            {
                singletonDocID = -1;
                // vInt encode the remaining doc deltas and freqs:
                for (int i = 0; i < DocBufferUpto; i++)
                {
                    int docDelta = DocDeltaBuffer[i];
                    int freq = FreqBuffer[i];
                    if (!FieldHasFreqs)
                    {
                        DocOut.WriteVInt(docDelta);
                    }
                    else if (FreqBuffer[i] == 1)
                    {
                        DocOut.WriteVInt((docDelta << 1) | 1);
                    }
                    else
                    {
                        DocOut.WriteVInt(docDelta << 1);
                        DocOut.WriteVInt(freq);
                    }
                }
            }

            long lastPosBlockOffset;

            if (FieldHasPositions)
            {
                // if (DEBUG) {
                //   if (posBufferUpto > 0) {
                //     System.out.println("  write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posStartFP=" + posStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets);
                //   }
                // }

                // totalTermFreq is just total number of positions(or payloads, or offsets)
                // associated with current term.
                Debug.Assert(state.TotalTermFreq != -1);
                if (state.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE)
                {
                    // record file offset for last pos in last block
                    lastPosBlockOffset = PosOut.FilePointer - PosStartFP;
                }
                else
                {
                    lastPosBlockOffset = -1;
                }
                if (PosBufferUpto > 0)
                {
                    // TODO: should we send offsets/payloads to
                    // .pay...?  seems wasteful (have to store extra
                    // vLong for low (< BLOCK_SIZE) DF terms = vast vast
                    // majority)

                    // vInt encode the remaining positions/payloads/offsets:
                    int lastPayloadLength = -1; // force first payload length to be written
                    int lastOffsetLength = -1; // force first offset length to be written
                    int payloadBytesReadUpto = 0;
                    for (int i = 0; i < PosBufferUpto; i++)
                    {
                        int posDelta = PosDeltaBuffer[i];
                        if (FieldHasPayloads)
                        {
                            int payloadLength = PayloadLengthBuffer[i];
                            if (payloadLength != lastPayloadLength)
                            {
                                lastPayloadLength = payloadLength;
                                PosOut.WriteVInt((posDelta << 1) | 1);
                                PosOut.WriteVInt(payloadLength);
                            }
                            else
                            {
                                PosOut.WriteVInt(posDelta << 1);
                            }

                            // if (DEBUG) {
                            //   System.out.println("        i=" + i + " payloadLen=" + payloadLength);
                            // }

                            if (payloadLength != 0)
                            {
                                // if (DEBUG) {
                                //   System.out.println("          write payload @ pos.fp=" + posOut.getFilePointer());
                                // }
                                PosOut.WriteBytes(PayloadBytes, payloadBytesReadUpto, payloadLength);
                                payloadBytesReadUpto += payloadLength;
                            }
                        }
                        else
                        {
                            PosOut.WriteVInt(posDelta);
                        }

                        if (FieldHasOffsets)
                        {
                            // if (DEBUG) {
                            //   System.out.println("          write offset @ pos.fp=" + posOut.getFilePointer());
                            // }
                            int delta = OffsetStartDeltaBuffer[i];
                            int length = OffsetLengthBuffer[i];
                            if (length == lastOffsetLength)
                            {
                                PosOut.WriteVInt(delta << 1);
                            }
                            else
                            {
                                PosOut.WriteVInt(delta << 1 | 1);
                                PosOut.WriteVInt(length);
                                lastOffsetLength = length;
                            }
                        }
                    }

                    if (FieldHasPayloads)
                    {
                        Debug.Assert(payloadBytesReadUpto == PayloadByteUpto);
                        PayloadByteUpto = 0;
                    }
                }
                // if (DEBUG) {
                //   System.out.println("  totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset);
                // }
            }
            else
            {
                lastPosBlockOffset = -1;
            }

            long skipOffset;
            if (DocCount > Lucene41PostingsFormat.BLOCK_SIZE)
            {
                skipOffset = SkipWriter.WriteSkip(DocOut) - DocStartFP;

                // if (DEBUG) {
                //   System.out.println("skip packet " + (docOut.getFilePointer() - (docStartFP + skipOffset)) + " bytes");
                // }
            }
            else
            {
                skipOffset = -1;
                // if (DEBUG) {
                //   System.out.println("  no skip: docCount=" + docCount);
                // }
            }
            // if (DEBUG) {
            //   System.out.println("  payStartFP=" + payStartFP);
            // }
            state.DocStartFP = DocStartFP;
            state.PosStartFP = PosStartFP;
            state.PayStartFP = PayStartFP;
            state.SingletonDocID = singletonDocID;
            state.SkipOffset = skipOffset;
            state.LastPosBlockOffset = lastPosBlockOffset;
            DocBufferUpto = 0;
            PosBufferUpto = 0;
            LastDocID = 0;
            DocCount = 0;
        }
Esempio n. 11
0
            private void FlushBlock()
            {
                // First pass: compute common prefix for all terms
                // in the block, against term before first term in
                // this block:

                int commonPrefix = SharedPrefix(_lastPrevTerm, _pendingTerms[0].Term);

                for (int termCount = 1; termCount < _pendingCount; termCount++)
                {
                    commonPrefix = Math.Min(commonPrefix,
                                            SharedPrefix(_lastPrevTerm,
                                                         _pendingTerms[termCount].Term));
                }

                _btw._output.WriteVInt(_pendingCount);
                _btw._output.WriteVInt(commonPrefix);

                // 2nd pass: write suffixes, as separate byte[] blob
                for (var termCount = 0; termCount < _pendingCount; termCount++)
                {
                    var suffix = _pendingTerms[termCount].Term.Length - commonPrefix;
                    // TODO: cutover to better intblock codec, instead
                    // of interleaving here:
                    _bytesWriter.WriteVInt(suffix);
                    _bytesWriter.WriteBytes(_pendingTerms[termCount].Term.Bytes, commonPrefix, suffix);
                }
                _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
                _bytesWriter.WriteTo(_btw._output);
                _bytesWriter.Reset();

                // 3rd pass: write the freqs as byte[] blob
                // TODO: cutover to better intblock codec.  simple64?
                // write prefix, suffix first:
                for (int termCount = 0; termCount < _pendingCount; termCount++)
                {
                    BlockTermState state = _pendingTerms[termCount].State;

                    Debug.Assert(state != null);

                    _bytesWriter.WriteVInt(state.DocFreq);
                    if (_fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY)
                    {
                        _bytesWriter.WriteVLong(state.TotalTermFreq - state.DocFreq);
                    }
                }
                _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
                _bytesWriter.WriteTo(_btw._output);
                _bytesWriter.Reset();

                // 4th pass: write the metadata
                var  longs    = new long[_longsSize];
                bool absolute = true;

                for (int termCount = 0; termCount < _pendingCount; termCount++)
                {
                    BlockTermState state = _pendingTerms[termCount].State;
                    _postingsWriter.EncodeTerm(longs, _bufferWriter, _fieldInfo, state, absolute);
                    for (int i = 0; i < _longsSize; i++)
                    {
                        _bytesWriter.WriteVLong(longs[i]);
                    }
                    _bufferWriter.WriteTo(_bytesWriter);
                    _bufferWriter.Reset();
                    absolute = false;
                }
                _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
                _bytesWriter.WriteTo(_btw._output);
                _bytesWriter.Reset();

                _lastPrevTerm.CopyBytes(_pendingTerms[_pendingCount - 1].Term);
                _pendingCount = 0;
            }
Esempio n. 12
0
        public override void EncodeTerm(long[] longs, DataOutput output, FieldInfo fi, BlockTermState bstate, bool absolute)
        {
            var state = (SepTermState)bstate;

            if (absolute)
            {
                LAST_SKIP_FP    = 0;
                LAST_PAYLOAD_FP = 0;
                _lastState      = state;
            }
            _lastState.DocIndex.CopyFrom(state.DocIndex, false);
            _lastState.DocIndex.Write(output, absolute);
            if (INDEX_OPTIONS != FieldInfo.IndexOptions.DOCS_ONLY)
            {
                _lastState.FreqIndex.CopyFrom(state.FreqIndex, false);
                _lastState.FreqIndex.Write(output, absolute);
                if (INDEX_OPTIONS == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                {
                    _lastState.PosIndex.CopyFrom(state.PosIndex, false);
                    _lastState.PosIndex.Write(output, absolute);
                    if (STORE_PAYLOADS)
                    {
                        if (absolute)
                        {
                            output.WriteVLong(state.PayloadFp);
                        }
                        else
                        {
                            output.WriteVLong(state.PayloadFp - LAST_PAYLOAD_FP);
                        }
                        LAST_PAYLOAD_FP = state.PayloadFp;
                    }
                }
            }
            if (state.SkipFp == -1)
            {
                return;
            }

            if (absolute)
            {
                output.WriteVLong(state.SkipFp);
            }
            else
            {
                output.WriteVLong(state.SkipFp - LAST_SKIP_FP);
            }
            LAST_SKIP_FP = state.SkipFp;
        }
Esempio n. 13
0
 public override void EncodeTerm(long[] empty, DataOutput output, FieldInfo fieldInfo, BlockTermState state,
     bool abs)
 {
     var _state = (PulsingTermState) state;
     Debug.Assert(empty.Length == 0);
     _absolute = _absolute || abs;
     if (_state.BYTES == null)
     {
         _wrappedPostingsWriter.EncodeTerm(_longs, _buffer, fieldInfo, _state.WRAPPED_STATE, _absolute);
         for (var i = 0; i < _longsSize; i++)
         {
             output.WriteVLong(_longs[i]);
         }
         _buffer.WriteTo(output);
         _buffer.Reset();
         _absolute = false;
     }
     else
     {
         output.WriteVInt(_state.BYTES.Length);
         output.WriteBytes(_state.BYTES, 0, _state.BYTES.Length);
         _absolute = _absolute || abs;
     }
 }
Esempio n. 14
0
        /// <summary>
        /// Called when we are done adding docs to this term
        /// </summary>
        /// <param name="_state"></param>
        public override void FinishTerm(BlockTermState _state)
        {
            var state = (PulsingTermState) _state;

            Debug.Assert(_pendingCount > 0 || _pendingCount == -1);

            if (_pendingCount == -1)
            {
                state.WRAPPED_STATE.DocFreq = state.DocFreq;
                state.WRAPPED_STATE.TotalTermFreq = state.TotalTermFreq;
                state.BYTES = null;
                _wrappedPostingsWriter.FinishTerm(state.WRAPPED_STATE);
            }
            else
            {
                // There were few enough total occurrences for this
                // term, so we fully inline our postings data into
                // terms dict, now:

                // TODO: it'd be better to share this encoding logic
                // in some inner codec that knows how to write a
                // single doc / single position, etc.  This way if a
                // given codec wants to store other interesting
                // stuff, it could use this pulsing codec to do so

                if (_indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                {
                    var lastDocID = 0;
                    var pendingIDX = 0;
                    var lastPayloadLength = -1;
                    var lastOffsetLength = -1;
                    while (pendingIDX < _pendingCount)
                    {
                        var doc = _pending[pendingIDX];

                        var delta = doc.docID - lastDocID;
                        lastDocID = doc.docID;

                        // if (DEBUG) System.out.println("  write doc=" + doc.docID + " freq=" + doc.termFreq);

                        if (doc.termFreq == 1)
                        {
                            _buffer.WriteVInt((delta << 1) | 1);
                        }
                        else
                        {
                            _buffer.WriteVInt(delta << 1);
                            _buffer.WriteVInt(doc.termFreq);
                        }

                        var lastPos = 0;
                        var lastOffset = 0;
                        for (var posIDX = 0; posIDX < doc.termFreq; posIDX++)
                        {
                            var pos = _pending[pendingIDX++];
                            Debug.Assert(pos.docID == doc.docID);
                            var posDelta = pos.pos - lastPos;
                            lastPos = pos.pos;

                            var payloadLength = pos.payload == null ? 0 : pos.payload.Length;
                            if (_storePayloads)
                            {
                                if (payloadLength != lastPayloadLength)
                                {
                                    _buffer.WriteVInt((posDelta << 1) | 1);
                                    _buffer.WriteVInt(payloadLength);
                                    lastPayloadLength = payloadLength;
                                }
                                else
                                {
                                    _buffer.WriteVInt(posDelta << 1);
                                }
                            }
                            else
                            {
                                _buffer.WriteVInt(posDelta);
                            }

                            if (_indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                            {
                                //System.out.println("write=" + pos.startOffset + "," + pos.endOffset);
                                var offsetDelta = pos.startOffset - lastOffset;
                                var offsetLength = pos.endOffset - pos.startOffset;
                                if (offsetLength != lastOffsetLength)
                                {
                                    _buffer.WriteVInt(offsetDelta << 1 | 1);
                                    _buffer.WriteVInt(offsetLength);
                                }
                                else
                                {
                                    _buffer.WriteVInt(offsetDelta << 1);
                                }
                                lastOffset = pos.startOffset;
                                lastOffsetLength = offsetLength;
                            }

                            if (payloadLength > 0)
                            {
                                Debug.Assert(_storePayloads);
                                _buffer.WriteBytes(pos.payload.Bytes, 0, pos.payload.Length);
                            }
                        }
                    }
                }
                else switch (_indexOptions)
                {
                    case FieldInfo.IndexOptions.DOCS_AND_FREQS:
                    {
                        var lastDocId = 0;
                        for (var posIdx = 0; posIdx < _pendingCount; posIdx++)
                        {
                            var doc = _pending[posIdx];
                            var delta = doc.docID - lastDocId;

                            Debug.Assert(doc.termFreq != 0);

                            if (doc.termFreq == 1)
                            {
                                _buffer.WriteVInt((delta << 1) | 1);
                            }
                            else
                            {
                                _buffer.WriteVInt(delta << 1);
                                _buffer.WriteVInt(doc.termFreq);
                            }
                            lastDocId = doc.docID;
                        }
                    }
                        break;
                    case FieldInfo.IndexOptions.DOCS_ONLY:
                    {
                        var lastDocId = 0;
                        for (var posIdx = 0; posIdx < _pendingCount; posIdx++)
                        {
                            var doc = _pending[posIdx];
                            _buffer.WriteVInt(doc.docID - lastDocId);
                            lastDocId = doc.docID;
                        }
                    }
                        break;
                }

                state.BYTES = new byte[(int) _buffer.FilePointer];
                _buffer.WriteTo(state.BYTES, 0);
                _buffer.Reset();
            }
            _pendingCount = 0;
        }
Esempio n. 15
0
        /// <summary>Called when we are done adding docs to this term </summary>
        public override void FinishTerm(BlockTermState bstate)
        {
            var state = (SepTermState)bstate;
            // TODO: -- wasteful we are counting this in two places?
            Debug.Assert(state.DocFreq > 0);
            Debug.Assert(state.DocFreq == DF);

            state.DocIndex = DOC_OUT.Index();
            state.DocIndex.CopyFrom(DOC_INDEX, false);
            if (INDEX_OPTIONS != FieldInfo.IndexOptions.DOCS_ONLY)
            {
                state.FreqIndex = FREQ_OUT.Index();
                state.FreqIndex.CopyFrom(FREQ_INDEX, false);
                if (INDEX_OPTIONS == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                {
                    state.PosIndex = POS_OUT.Index();
                    state.PosIndex.CopyFrom(POS_INDEX, false);
                }
                else
                {
                    state.PosIndex = null;
                }
            }
            else
            {
                state.FreqIndex = null;
                state.PosIndex = null;
            }

            if (DF >= SKIP_MINIMUM)
            {
                state.SkipFp = SKIP_OUT.FilePointer;
                SKIP_LIST_WRITER.WriteSkip(SKIP_OUT);
            }
            else
            {
                state.SkipFp = -1;
            }
            state.PayloadFp = PAYLOAD_START;

            LAST_DOC_ID = 0;
            DF = 0;
        }
Esempio n. 16
0
        public override void EncodeTerm(long[] longs, DataOutput output, FieldInfo fi, BlockTermState bstate, bool absolute)
        {
            var state = (SepTermState) bstate;
            if (absolute)
            {
                LAST_SKIP_FP = 0;
                LAST_PAYLOAD_FP = 0;
                _lastState = state;
            }
            _lastState.DocIndex.CopyFrom(state.DocIndex, false);
            _lastState.DocIndex.Write(output, absolute);
            if (INDEX_OPTIONS != FieldInfo.IndexOptions.DOCS_ONLY)
            {
                _lastState.FreqIndex.CopyFrom(state.FreqIndex, false);
                _lastState.FreqIndex.Write(output, absolute);
                if (INDEX_OPTIONS == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                {
                    _lastState.PosIndex.CopyFrom(state.PosIndex, false);
                    _lastState.PosIndex.Write(output, absolute);
                    if (STORE_PAYLOADS)
                    {
                        if (absolute)
                        {
                            output.WriteVLong(state.PayloadFp);
                        }
                        else
                        {
                            output.WriteVLong(state.PayloadFp - LAST_PAYLOAD_FP);
                        }
                        LAST_PAYLOAD_FP = state.PayloadFp;
                    }
                }
            }
            if (state.SkipFp == -1) return;

            if (absolute)
            {
                output.WriteVLong(state.SkipFp);
            }
            else
            {
                output.WriteVLong(state.SkipFp - LAST_SKIP_FP);
            }
            LAST_SKIP_FP = state.SkipFp;
        }
        // TODO: specialize to liveDocs vs not
        public override DocsAndPositionsEnum DocsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse, int flags)
        {
            bool indexHasOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            bool indexHasPayloads = fieldInfo.HasPayloads();

            if ((!indexHasOffsets || (flags & DocsAndPositionsEnum.FLAG_OFFSETS) == 0) && (!indexHasPayloads || (flags & DocsAndPositionsEnum.FLAG_PAYLOADS) == 0))
            {
                BlockDocsAndPositionsEnum docsAndPositionsEnum;
                if (reuse is BlockDocsAndPositionsEnum)
                {
                    docsAndPositionsEnum = (BlockDocsAndPositionsEnum)reuse;
                    if (!docsAndPositionsEnum.CanReuse(DocIn, fieldInfo))
                    {
                        docsAndPositionsEnum = new BlockDocsAndPositionsEnum(this, fieldInfo);
                    }
                }
                else
                {
                    docsAndPositionsEnum = new BlockDocsAndPositionsEnum(this, fieldInfo);
                }
                return docsAndPositionsEnum.Reset(liveDocs, (Lucene41PostingsWriter.IntBlockTermState)termState);
            }
            else
            {
                EverythingEnum everythingEnum;
                if (reuse is EverythingEnum)
                {
                    everythingEnum = (EverythingEnum)reuse;
                    if (!everythingEnum.CanReuse(DocIn, fieldInfo))
                    {
                        everythingEnum = new EverythingEnum(this, fieldInfo);
                    }
                }
                else
                {
                    everythingEnum = new EverythingEnum(this, fieldInfo);
                }
                return everythingEnum.Reset(liveDocs, (Lucene41PostingsWriter.IntBlockTermState)termState, flags);
            }
        }
Esempio n. 18
0
        public override void EncodeTerm(long[] longs, DataOutput output, FieldInfo fi, BlockTermState state, bool absolute)
        {
            SepTermState state_ = (SepTermState)state;

            if (absolute)
            {
                lastSkipFP    = 0;
                lastPayloadFP = 0;
                lastState     = state_;
            }
            lastState.DocIndex.CopyFrom(state_.DocIndex, false);
            lastState.DocIndex.Write(output, absolute);
            if (indexOptions != IndexOptions.DOCS_ONLY)
            {
                lastState.FreqIndex.CopyFrom(state_.FreqIndex, false);
                lastState.FreqIndex.Write(output, absolute);
                if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                {
                    lastState.PosIndex.CopyFrom(state_.PosIndex, false);
                    lastState.PosIndex.Write(output, absolute);
                    if (storePayloads)
                    {
                        if (absolute)
                        {
                            output.WriteVInt64(state_.PayloadFP);
                        }
                        else
                        {
                            output.WriteVInt64(state_.PayloadFP - lastPayloadFP);
                        }
                        lastPayloadFP = state_.PayloadFP;
                    }
                }
            }
            if (state_.SkipFP != -1)
            {
                if (absolute)
                {
                    output.WriteVInt64(state_.SkipFP);
                }
                else
                {
                    output.WriteVInt64(state_.SkipFP - lastSkipFP);
                }
                lastSkipFP = state_.SkipFP;
            }
        }
Esempio n. 19
0
        /// <summary>
        /// Called when we are done adding docs to this term
        /// </summary>
        /// <param name="_state"></param>
        public override void FinishTerm(BlockTermState _state)
        {
            PulsingTermState state = (PulsingTermState) _state;

            Debug.Debug.Assert((pendingCount > 0 || pendingCount == -1);

            if (pendingCount == -1)
            {
                state.wrappedState.DocFreq = state.DocFreq;
                state.wrappedState.TotalTermFreq = state.TotalTermFreq;
                state.bytes = null;
                _wrappedPostingsWriter.FinishTerm(state.wrappedState);
            }
            else
            {
                // There were few enough total occurrences for this
                // term, so we fully inline our postings data into
                // terms dict, now:

                // TODO: it'd be better to share this encoding logic
                // in some inner codec that knows how to write a
                // single doc / single position, etc.  This way if a
                // given codec wants to store other interesting
                // stuff, it could use this pulsing codec to do so

                if (indexOptions.Value.CompareTo(FieldInfo.IndexOptions_e.DOCS_AND_FREQS_AND_POSITIONS) >= 0)
                {
                    int lastDocID = 0;
                    int pendingIDX = 0;
                    int lastPayloadLength = -1;
                    int lastOffsetLength = -1;
                    while (pendingIDX < pendingCount)
                    {
                        Position doc = pending[pendingIDX];

                        int delta = doc.docID - lastDocID;
                        lastDocID = doc.docID;

                        // if (DEBUG) System.out.println("  write doc=" + doc.docID + " freq=" + doc.termFreq);

                        if (doc.termFreq == 1)
                        {
                            buffer.WriteVInt((delta << 1) | 1);
                        }
                        else
                        {
                            buffer.WriteVInt(delta << 1);
                            buffer.WriteVInt(doc.termFreq);
                        }

                        int lastPos = 0;
                        int lastOffset = 0;
                        for (int posIDX = 0; posIDX < doc.termFreq; posIDX++)
                        {
                            Position pos = pending[pendingIDX++];
                            Debug.Debug.Assert((pos.docID == doc.docID);
                            int posDelta = pos.pos - lastPos;
                            lastPos = pos.pos;
                            
                            int payloadLength = pos.payload == null ? 0 : pos.payload.Length;
                            if (storePayloads)
                            {
                                if (payloadLength != lastPayloadLength)
                                {
                                    buffer.WriteVInt((posDelta << 1) | 1);
                                    buffer.WriteVInt(payloadLength);
                                    lastPayloadLength = payloadLength;
                                }
                                else
                                {
                                    buffer.WriteVInt(posDelta << 1);
                                }
                            }
                            else
                            {
                                buffer.WriteVInt(posDelta);
                            }

                            if (indexOptions.Value.CompareTo(FieldInfo.IndexOptions_e.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0)
                            {
                                //System.out.println("write=" + pos.startOffset + "," + pos.endOffset);
                                int offsetDelta = pos.startOffset - lastOffset;
                                int offsetLength = pos.endOffset - pos.startOffset;
                                if (offsetLength != lastOffsetLength)
                                {
                                    buffer.WriteVInt(offsetDelta << 1 | 1);
                                    buffer.WriteVInt(offsetLength);
                                }
                                else
                                {
                                    buffer.WriteVInt(offsetDelta << 1);
                                }
                                lastOffset = pos.startOffset;
                                lastOffsetLength = offsetLength;
                            }

                            if (payloadLength > 0)
                            {
                                Debug.Debug.Assert((storePayloads);
                                buffer.WriteBytes(pos.payload.Bytes, 0, pos.payload.Length);
                            }
                        }
                    }
                }
                else if (indexOptions == FieldInfo.IndexOptions_e.DOCS_AND_FREQS)
                {
                    int lastDocID = 0;
                    for (int posIDX = 0; posIDX < pendingCount; posIDX++)
                    {
                        Position doc = pending[posIDX];
                        int delta = doc.docID - lastDocID;
                        Debug.Debug.Assert((doc.termFreq != 0);

                        if (doc.termFreq == 1)
                        {
                            buffer.WriteVInt((delta << 1) | 1);
                        }
                        else
                        {
                            buffer.WriteVInt(delta << 1);
                            buffer.WriteVInt(doc.termFreq);
                        }
                        lastDocID = doc.docID;
                    }
                }
                else if (indexOptions == FieldInfo.IndexOptions_e.DOCS_ONLY)
                {
                    int lastDocID = 0;
                    for (int posIDX = 0; posIDX < pendingCount; posIDX++)
                    {
                        Position doc = pending[posIDX];
                        buffer.WriteVInt(doc.docID - lastDocID);
                        lastDocID = doc.docID;
                    }
                }

                state.bytes = new byte[(int) buffer.FilePointer];
                buffer.WriteTo((sbyte[])(Array)state.bytes, 0);
                buffer.Reset();
            }
            pendingCount = 0;
        }
Esempio n. 20
0
        /// <summary>
        /// Called when we are done adding docs to this term
        /// </summary>
        /// <param name="state"></param>
        public override void FinishTerm(BlockTermState state)
        {
            var state2 = (PulsingTermState)state;

            Debug.Assert(_pendingCount > 0 || _pendingCount == -1);

            if (_pendingCount == -1)
            {
                state2.wrappedState.DocFreq       = state2.DocFreq;
                state2.wrappedState.TotalTermFreq = state2.TotalTermFreq;
                state2.bytes = null;
                _wrappedPostingsWriter.FinishTerm(state2.wrappedState);
            }
            else
            {
                // There were few enough total occurrences for this
                // term, so we fully inline our postings data into
                // terms dict, now:

                // TODO: it'd be better to share this encoding logic
                // in some inner codec that knows how to write a
                // single doc / single position, etc.  This way if a
                // given codec wants to store other interesting
                // stuff, it could use this pulsing codec to do so

                if (_indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0)
                {
                    var lastDocID         = 0;
                    var pendingIDX        = 0;
                    var lastPayloadLength = -1;
                    var lastOffsetLength  = -1;
                    while (pendingIDX < _pendingCount)
                    {
                        var doc = _pending[pendingIDX];

                        var delta = doc.docID - lastDocID;
                        lastDocID = doc.docID;

                        // if (DEBUG) System.out.println("  write doc=" + doc.docID + " freq=" + doc.termFreq);

                        if (doc.termFreq == 1)
                        {
                            _buffer.WriteVInt32((delta << 1) | 1);
                        }
                        else
                        {
                            _buffer.WriteVInt32(delta << 1);
                            _buffer.WriteVInt32(doc.termFreq);
                        }

                        var lastPos    = 0;
                        var lastOffset = 0;
                        for (var posIDX = 0; posIDX < doc.termFreq; posIDX++)
                        {
                            var pos = _pending[pendingIDX++];
                            Debug.Assert(pos.docID == doc.docID);
                            var posDelta = pos.pos - lastPos;
                            lastPos = pos.pos;

                            var payloadLength = pos.payload == null ? 0 : pos.payload.Length;
                            if (_storePayloads)
                            {
                                if (payloadLength != lastPayloadLength)
                                {
                                    _buffer.WriteVInt32((posDelta << 1) | 1);
                                    _buffer.WriteVInt32(payloadLength);
                                    lastPayloadLength = payloadLength;
                                }
                                else
                                {
                                    _buffer.WriteVInt32(posDelta << 1);
                                }
                            }
                            else
                            {
                                _buffer.WriteVInt32(posDelta);
                            }

                            if (_indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0)
                            {
                                //System.out.println("write=" + pos.startOffset + "," + pos.endOffset);
                                var offsetDelta  = pos.startOffset - lastOffset;
                                var offsetLength = pos.endOffset - pos.startOffset;
                                if (offsetLength != lastOffsetLength)
                                {
                                    _buffer.WriteVInt32(offsetDelta << 1 | 1);
                                    _buffer.WriteVInt32(offsetLength);
                                }
                                else
                                {
                                    _buffer.WriteVInt32(offsetDelta << 1);
                                }
                                lastOffset       = pos.startOffset;
                                lastOffsetLength = offsetLength;
                            }

                            if (payloadLength > 0)
                            {
                                Debug.Assert(_storePayloads);
                                _buffer.WriteBytes(pos.payload.Bytes, 0, pos.payload.Length);
                            }
                        }
                    }
                }
                else if (_indexOptions == IndexOptions.DOCS_AND_FREQS)
                {
                    int lastDocId = 0;
                    for (int posIdx = 0; posIdx < _pendingCount; posIdx++)
                    {
                        Position doc   = _pending[posIdx];
                        int      delta = doc.docID - lastDocId;

                        Debug.Assert(doc.termFreq != 0);

                        if (doc.termFreq == 1)
                        {
                            _buffer.WriteVInt32((delta << 1) | 1);
                        }
                        else
                        {
                            _buffer.WriteVInt32(delta << 1);
                            _buffer.WriteVInt32(doc.termFreq);
                        }
                        lastDocId = doc.docID;
                    }
                }
                else if (_indexOptions == IndexOptions.DOCS_ONLY)
                {
                    int lastDocId = 0;
                    for (int posIdx = 0; posIdx < _pendingCount; posIdx++)
                    {
                        Position doc = _pending[posIdx];
                        _buffer.WriteVInt32(doc.docID - lastDocId);
                        lastDocId = doc.docID;
                    }
                }

                state2.bytes = new byte[(int)_buffer.GetFilePointer()];
                _buffer.WriteTo(state2.bytes, 0);
                _buffer.Reset();
            }
            _pendingCount = 0;
        }
Esempio n. 21
0
 public override void EncodeTerm(long[] empty, DataOutput output, FieldInfo fieldInfo, BlockTermState _state,
     bool absolute)
 {
     PulsingTermState state = (PulsingTermState) _state;
     Debug.Debug.Assert((empty.Length == 0);
     this.absolute = this.absolute || absolute;
     if (state.bytes == null)
     {
         _wrappedPostingsWriter.EncodeTerm(longs, buffer, fieldInfo, state.wrappedState, this.absolute);
         for (int i = 0; i < longsSize; i++)
         {
             output.WriteVLong(longs[i]);
         }
         buffer.WriteTo(output);
         buffer.Reset();
         this.absolute = false;
     }
     else
     {
         output.WriteVInt(state.bytes.Length);
         output.WriteBytes(state.bytes, 0, state.bytes.Length);
         this.absolute = this.absolute || absolute;
     }
 }
        public override void DecodeTerm(long[] empty, DataInput input, FieldInfo fieldInfo, BlockTermState _termState,
                                        bool absolute)
        {
            var termState = (PulsingTermState)_termState;

            Debug.Assert(empty.Length == 0);

            termState.Absolute = termState.Absolute || absolute;
            // if we have positions, its total TF, otherwise its computed based on docFreq.
            // TODO Double check this is right..
            long count = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS.CompareTo(fieldInfo.FieldIndexOptions) <= 0
                ? termState.TotalTermFreq
                : termState.DocFreq;

            if (count <= _maxPositions)
            {
                // Inlined into terms dict -- just read the byte[] blob in,
                // but don't decode it now (we only decode when a DocsEnum
                // or D&PEnum is pulled):
                termState.PostingsSize = input.ReadVInt();
                if (termState.Postings == null || termState.Postings.Length < termState.PostingsSize)
                {
                    termState.Postings = new byte[ArrayUtil.Oversize(termState.PostingsSize, 1)];
                }
                // TODO: sort of silly to copy from one big byte[]
                // (the blob holding all inlined terms' blobs for
                // current term block) into another byte[] (just the
                // blob for this term)...
                input.ReadBytes(termState.Postings, 0, termState.PostingsSize);
                //System.out.println("  inlined bytes=" + termState.postingsSize);
                termState.Absolute = termState.Absolute || absolute;
            }
            else
            {
                var longsSize = _fields == null ? 0 : _fields[fieldInfo.Number];
                if (termState.Longs == null)
                {
                    termState.Longs = new long[longsSize];
                }
                for (var i = 0; i < longsSize; i++)
                {
                    termState.Longs[i] = input.ReadVLong();
                }
                termState.PostingsSize                   = -1;
                termState.WrappedTermState.DocFreq       = termState.DocFreq;
                termState.WrappedTermState.TotalTermFreq = termState.TotalTermFreq;
                _wrappedPostingsReader.DecodeTerm(termState.Longs, input, fieldInfo,
                                                  termState.WrappedTermState,
                                                  termState.Absolute);
                termState.Absolute = false;
            }
        }
Esempio n. 23
0
        public override void DecodeTerm(long[] empty, DataInput input, FieldInfo fieldInfo, BlockTermState _termState,
            bool absolute)
        {
            PulsingTermState termState = (PulsingTermState) _termState;

            Debug.Debug.Assert((empty.Length == 0);
            termState.Absolute = termState.Absolute || absolute;
            // if we have positions, its total TF, otherwise its computed based on docFreq.
            // TODO Double check this is right..
            long count = FieldInfo.IndexOptions_e.DOCS_AND_FREQS_AND_POSITIONS.CompareTo(fieldInfo.IndexOptions) <= 0
                ? termState.TotalTermFreq
                : termState.DocFreq;
            //System.out.println("  count=" + count + " threshold=" + maxPositions);

            if (count <= maxPositions)
            {
                // Inlined into terms dict -- just read the byte[] blob in,
                // but don't decode it now (we only decode when a DocsEnum
                // or D&PEnum is pulled):
                termState.PostingsSize = input.ReadVInt();
                if (termState.Postings == null || termState.Postings.Length < termState.PostingsSize)
                {
                    termState.Postings = new byte[ArrayUtil.Oversize(termState.PostingsSize, 1)];
                }
                // TODO: sort of silly to copy from one big byte[]
                // (the blob holding all inlined terms' blobs for
                // current term block) into another byte[] (just the
                // blob for this term)...
                input.ReadBytes(termState.Postings, 0, termState.PostingsSize);
                //System.out.println("  inlined bytes=" + termState.postingsSize);
                termState.Absolute = termState.Absolute || absolute;
            }
            else
            {
                int longsSize = fields == null ? 0 : fields[fieldInfo.Number];
                if (termState.Longs == null)
                {
                    termState.Longs = new long[longsSize];
                }
                for (int i = 0; i < longsSize; i++)
                {
                    termState.Longs[i] = input.ReadVLong();
                }
                termState.PostingsSize = -1;
                termState.WrappedTermState.DocFreq = termState.DocFreq;
                termState.WrappedTermState.TotalTermFreq = termState.TotalTermFreq;
                _wrappedPostingsReader.DecodeTerm(termState.Longs, input, fieldInfo,
                    termState.WrappedTermState,
                    termState.Absolute);
                termState.Absolute = false;
            }
        }
Esempio n. 24
0
 public override void EncodeTerm(long[] longs, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute)
 {
     IntBlockTermState state = (IntBlockTermState)_state;
     if (absolute)
     {
         LastState = EmptyState;
     }
     longs[0] = state.DocStartFP - LastState.DocStartFP;
     if (FieldHasPositions)
     {
         longs[1] = state.PosStartFP - LastState.PosStartFP;
         if (FieldHasPayloads || FieldHasOffsets)
         {
             longs[2] = state.PayStartFP - LastState.PayStartFP;
         }
     }
     if (state.SingletonDocID != -1)
     {
         @out.WriteVInt(state.SingletonDocID);
     }
     if (FieldHasPositions)
     {
         if (state.LastPosBlockOffset != -1)
         {
             @out.WriteVLong(state.LastPosBlockOffset);
         }
     }
     if (state.SkipOffset != -1)
     {
         @out.WriteVLong(state.SkipOffset);
     }
     LastState = state;
 }
Esempio n. 25
0
        public override DocsAndPositionsEnum DocsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs,
            DocsAndPositionsEnum reuse,
            int flags)
        {

            PulsingTermState termState = (PulsingTermState) _termState;

            if (termState.PostingsSize != -1)
            {
                PulsingDocsAndPositionsEnum postings;
                if (reuse is PulsingDocsAndPositionsEnum)
                {
                    postings = (PulsingDocsAndPositionsEnum) reuse;
                    if (!postings.CanReuse(field))
                    {
                        postings = new PulsingDocsAndPositionsEnum(field);
                    }
                }
                else
                {
                    // the 'reuse' is actually the wrapped enum
                    PulsingDocsAndPositionsEnum previous = (PulsingDocsAndPositionsEnum) GetOther(reuse);
                    if (previous != null && previous.CanReuse(field))
                    {
                        postings = previous;
                    }
                    else
                    {
                        postings = new PulsingDocsAndPositionsEnum(field);
                    }
                }
                if (reuse != postings)
                {
                    SetOther(postings, reuse); // postings.other = reuse 
                }
                return postings.reset(liveDocs, termState);
            }
            else
            {
                if (reuse is PulsingDocsAndPositionsEnum)
                {
                    DocsAndPositionsEnum wrapped = _wrappedPostingsReader.DocsAndPositions(field,
                        termState.WrappedTermState,
                        liveDocs, (DocsAndPositionsEnum) GetOther(reuse),
                        flags);
                    SetOther(wrapped, reuse); // wrapped.other = reuse
                    return wrapped;
                }
                else
                {
                    return _wrappedPostingsReader.DocsAndPositions(field, termState.WrappedTermState, liveDocs, reuse,
                        flags);
                }
            }
        }
Esempio n. 26
0
            private void FlushBlock()
            {
                //System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());

                // First pass: compute common prefix for all terms
                // in the block, against term before first term in
                // this block:
                int commonPrefix = SharedPrefix(lastPrevTerm, pendingTerms[0].Term);

                for (int termCount = 1; termCount < pendingCount; termCount++)
                {
                    commonPrefix = Math.Min(commonPrefix,
                                            SharedPrefix(lastPrevTerm,
                                                         pendingTerms[termCount].Term));
                }

                outerInstance.m_output.WriteVInt32(pendingCount);
                outerInstance.m_output.WriteVInt32(commonPrefix);

                // 2nd pass: write suffixes, as separate byte[] blob
                for (int termCount = 0; termCount < pendingCount; termCount++)
                {
                    int suffix = pendingTerms[termCount].Term.Length - commonPrefix;
                    // TODO: cutover to better intblock codec, instead
                    // of interleaving here:
                    bytesWriter.WriteVInt32(suffix);
                    bytesWriter.WriteBytes(pendingTerms[termCount].Term.Bytes, commonPrefix, suffix);
                }
                outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                bytesWriter.WriteTo(outerInstance.m_output);
                bytesWriter.Reset();

                // 3rd pass: write the freqs as byte[] blob
                // TODO: cutover to better intblock codec.  simple64?
                // write prefix, suffix first:
                for (int termCount = 0; termCount < pendingCount; termCount++)
                {
                    BlockTermState state = pendingTerms[termCount].State;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(state != null);
                    }
                    bytesWriter.WriteVInt32(state.DocFreq);
                    if (fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY)
                    {
                        bytesWriter.WriteVInt64(state.TotalTermFreq - state.DocFreq);
                    }
                }
                outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                bytesWriter.WriteTo(outerInstance.m_output);
                bytesWriter.Reset();

                // 4th pass: write the metadata
                long[] longs    = new long[longsSize];
                bool   absolute = true;

                for (int termCount = 0; termCount < pendingCount; termCount++)
                {
                    BlockTermState state = pendingTerms[termCount].State;
                    postingsWriter.EncodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
                    for (int i = 0; i < longsSize; i++)
                    {
                        bytesWriter.WriteVInt64(longs[i]);
                    }
                    bufferWriter.WriteTo(bytesWriter);
                    bufferWriter.Reset();
                    absolute = false;
                }
                outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                bytesWriter.WriteTo(outerInstance.m_output);
                bytesWriter.Reset();

                lastPrevTerm.CopyBytes(pendingTerms[pendingCount - 1].Term);
                pendingCount = 0;
            }
        public override DocsAndPositionsEnum DocsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse, int flags)
        {
            bool hasOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;

            // TODO: can we optimize if FLAG_PAYLOADS / FLAG_OFFSETS
            // isn't passed?

            // TODO: refactor
            if (fieldInfo.HasPayloads() || hasOffsets)
            {
                SegmentFullPositionsEnum docsEnum;
                if (reuse == null || !(reuse is SegmentFullPositionsEnum))
                {
                    docsEnum = new SegmentFullPositionsEnum(this, FreqIn, ProxIn);
                }
                else
                {
                    docsEnum = (SegmentFullPositionsEnum)reuse;
                    if (docsEnum.StartFreqIn != FreqIn)
                    {
                        // If you are using ParellelReader, and pass in a
                        // reused DocsEnum, it could have come from another
                        // reader also using standard codec
                        docsEnum = new SegmentFullPositionsEnum(this, FreqIn, ProxIn);
                    }
                }
                return docsEnum.Reset(fieldInfo, (StandardTermState)termState, liveDocs);
            }
            else
            {
                SegmentDocsAndPositionsEnum docsEnum;
                if (reuse == null || !(reuse is SegmentDocsAndPositionsEnum))
                {
                    docsEnum = new SegmentDocsAndPositionsEnum(this, FreqIn, ProxIn);
                }
                else
                {
                    docsEnum = (SegmentDocsAndPositionsEnum)reuse;
                    if (docsEnum.StartFreqIn != FreqIn)
                    {
                        // If you are using ParellelReader, and pass in a
                        // reused DocsEnum, it could have come from another
                        // reader also using standard codec
                        docsEnum = new SegmentDocsAndPositionsEnum(this, FreqIn, ProxIn);
                    }
                }
                return docsEnum.Reset(fieldInfo, (StandardTermState)termState, liveDocs);
            }
        }
 public override void EncodeTerm(long[] empty, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute)
 {
     StandardTermState state = (StandardTermState)_state;
     if (absolute)
     {
         LastState = EmptyState;
     }
     @out.WriteVLong(state.FreqStart - LastState.FreqStart);
     if (state.SkipOffset != -1)
     {
         Debug.Assert(state.SkipOffset > 0);
         @out.WriteVLong(state.SkipOffset);
     }
     if (IndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
     {
         @out.WriteVLong(state.ProxStart - LastState.ProxStart);
     }
     LastState = state;
 }
Esempio n. 29
0
        public override void EncodeTerm(long[] longs, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute)
        {
            IntBlockTermState state = (IntBlockTermState)_state;

            if (absolute)
            {
                LastState = EmptyState;
            }
            longs[0] = state.DocStartFP - LastState.DocStartFP;
            if (FieldHasPositions)
            {
                longs[1] = state.PosStartFP - LastState.PosStartFP;
                if (FieldHasPayloads || FieldHasOffsets)
                {
                    longs[2] = state.PayStartFP - LastState.PayStartFP;
                }
            }
            if (state.SingletonDocID != -1)
            {
                @out.WriteVInt(state.SingletonDocID);
            }
            if (FieldHasPositions)
            {
                if (state.LastPosBlockOffset != -1)
                {
                    @out.WriteVLong(state.LastPosBlockOffset);
                }
            }
            if (state.SkipOffset != -1)
            {
                @out.WriteVLong(state.SkipOffset);
            }
            LastState = state;
        }
Esempio n. 30
0
        public override void DecodeTerm(long[] longs, DataInput @in, FieldInfo fieldInfo, BlockTermState termState, bool absolute)
        {
            StandardTermState termState2 = (StandardTermState)termState;

            // if (DEBUG) System.out.println("SPR: nextTerm seg=" + segment + " tbOrd=" + termState2.termBlockOrd + " bytesReader.fp=" + termState.bytesReader.getPosition());
            //bool isFirstTerm = termState2.TermBlockOrd == 0; // LUCENENET: IDE0059: Remove unnecessary value assignment
            if (absolute)
            {
                termState2.freqOffset = 0;
                termState2.proxOffset = 0;
            }

            termState2.freqOffset += @in.ReadVInt64();

            /*
             * if (DEBUG) {
             * System.out.println("  dF=" + termState2.docFreq);
             * System.out.println("  freqFP=" + termState2.freqOffset);
             * }
             */
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(termState2.freqOffset < freqIn.Length);
            }

            if (termState2.DocFreq >= skipMinimum)
            {
                termState2.skipOffset = @in.ReadVInt64();
                // if (DEBUG) System.out.println("  skipOffset=" + termState2.skipOffset + " vs freqIn.length=" + freqIn.length());
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(termState2.freqOffset + termState2.skipOffset < freqIn.Length);
                }
            }
            else
            {
                // undefined
            }

            // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare()
            if (IndexOptionsComparer.Default.Compare(fieldInfo.IndexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0)
            {
                termState2.proxOffset += @in.ReadVInt64();
                // if (DEBUG) System.out.println("  proxFP=" + termState2.proxOffset);
            }
        }
Esempio n. 31
0
        public override DocsAndPositionsEnum DocsAndPositions(FieldInfo fieldInfo, BlockTermState bTermState,
            Bits liveDocs, DocsAndPositionsEnum reuse, int flags)
        {

            Debug.Assert(fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
            var termState = (SepTermState)bTermState;
            SepDocsAndPositionsEnum postingsEnum;
            if (!(reuse is SepDocsAndPositionsEnum))
            {
                postingsEnum = new SepDocsAndPositionsEnum(this);
            }
            else
            {
                postingsEnum = (SepDocsAndPositionsEnum) reuse;
                if (postingsEnum.START_DOC_IN != _docIn)
                {
                    // If you are using ParellelReader, and pass in a
                    // reused DocsAndPositionsEnum, it could have come
                    // from another reader also using sep codec
                    postingsEnum = new SepDocsAndPositionsEnum(this);
                }
            }

            return postingsEnum.Init(fieldInfo, termState, liveDocs);
        }
 public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags)
 {
     if (CanReuse(reuse, liveDocs))
     {
         // if (DEBUG) System.out.println("SPR.docs ts=" + termState);
         return ((SegmentDocsEnumBase)reuse).Reset(fieldInfo, (StandardTermState)termState);
     }
     return NewDocsEnum(liveDocs, fieldInfo, (StandardTermState)termState);
 }
        public override void DecodeTerm(long[] longs, DataInput @in, FieldInfo fieldInfo, BlockTermState _termState, bool absolute)
        {
            Lucene41PostingsWriter.IntBlockTermState termState = (Lucene41PostingsWriter.IntBlockTermState)_termState;
            bool fieldHasPositions = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
            bool fieldHasOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            bool fieldHasPayloads = fieldInfo.HasPayloads();

            if (absolute)
            {
                termState.DocStartFP = 0;
                termState.PosStartFP = 0;
                termState.PayStartFP = 0;
            }
            if (Version < Lucene41PostingsWriter.VERSION_META_ARRAY) // backward compatibility
            {
                _decodeTerm(@in, fieldInfo, termState);
                return;
            }
            termState.DocStartFP += longs[0];
            if (fieldHasPositions)
            {
                termState.PosStartFP += longs[1];
                if (fieldHasOffsets || fieldHasPayloads)
                {
                    termState.PayStartFP += longs[2];
                }
            }
            if (termState.DocFreq == 1)
            {
                termState.SingletonDocID = @in.ReadVInt();
            }
            else
            {
                termState.SingletonDocID = -1;
            }
            if (fieldHasPositions)
            {
                if (termState.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE)
                {
                    termState.LastPosBlockOffset = @in.ReadVLong();
                }
                else
                {
                    termState.LastPosBlockOffset = -1;
                }
            }
            if (termState.DocFreq > Lucene41PostingsFormat.BLOCK_SIZE)
            {
                termState.SkipOffset = @in.ReadVLong();
            }
            else
            {
                termState.SkipOffset = -1;
            }
        }
Esempio n. 34
0
        /// <summary>
        /// Called when we are done adding docs to this term </summary>
        public override void FinishTerm(BlockTermState _state)
        {
            IntBlockTermState state = (IntBlockTermState)_state;

            Debug.Assert(state.DocFreq > 0);

            // TODO: wasteful we are counting this (counting # docs
            // for this term) in two places?
            Debug.Assert(state.DocFreq == DocCount, state.DocFreq + " vs " + DocCount);

            // if (DEBUG) {
            //   System.out.println("FPW.finishTerm docFreq=" + state.docFreq);
            // }

            // if (DEBUG) {
            //   if (docBufferUpto > 0) {
            //     System.out.println("  write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docStartFP=" + docStartFP);
            //   }
            // }

            // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it.
            int singletonDocID;

            if (state.DocFreq == 1)
            {
                // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
                singletonDocID = DocDeltaBuffer[0];
            }
            else
            {
                singletonDocID = -1;
                // vInt encode the remaining doc deltas and freqs:
                for (int i = 0; i < DocBufferUpto; i++)
                {
                    int docDelta = DocDeltaBuffer[i];
                    int freq     = FreqBuffer[i];
                    if (!FieldHasFreqs)
                    {
                        DocOut.WriteVInt(docDelta);
                    }
                    else if (FreqBuffer[i] == 1)
                    {
                        DocOut.WriteVInt((docDelta << 1) | 1);
                    }
                    else
                    {
                        DocOut.WriteVInt(docDelta << 1);
                        DocOut.WriteVInt(freq);
                    }
                }
            }

            long lastPosBlockOffset;

            if (FieldHasPositions)
            {
                // if (DEBUG) {
                //   if (posBufferUpto > 0) {
                //     System.out.println("  write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posStartFP=" + posStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets);
                //   }
                // }

                // totalTermFreq is just total number of positions(or payloads, or offsets)
                // associated with current term.
                Debug.Assert(state.TotalTermFreq != -1);
                if (state.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE)
                {
                    // record file offset for last pos in last block
                    lastPosBlockOffset = PosOut.FilePointer - PosStartFP;
                }
                else
                {
                    lastPosBlockOffset = -1;
                }
                if (PosBufferUpto > 0)
                {
                    // TODO: should we send offsets/payloads to
                    // .pay...?  seems wasteful (have to store extra
                    // vLong for low (< BLOCK_SIZE) DF terms = vast vast
                    // majority)

                    // vInt encode the remaining positions/payloads/offsets:
                    int lastPayloadLength    = -1; // force first payload length to be written
                    int lastOffsetLength     = -1; // force first offset length to be written
                    int payloadBytesReadUpto = 0;
                    for (int i = 0; i < PosBufferUpto; i++)
                    {
                        int posDelta = PosDeltaBuffer[i];
                        if (FieldHasPayloads)
                        {
                            int payloadLength = PayloadLengthBuffer[i];
                            if (payloadLength != lastPayloadLength)
                            {
                                lastPayloadLength = payloadLength;
                                PosOut.WriteVInt((posDelta << 1) | 1);
                                PosOut.WriteVInt(payloadLength);
                            }
                            else
                            {
                                PosOut.WriteVInt(posDelta << 1);
                            }

                            // if (DEBUG) {
                            //   System.out.println("        i=" + i + " payloadLen=" + payloadLength);
                            // }

                            if (payloadLength != 0)
                            {
                                // if (DEBUG) {
                                //   System.out.println("          write payload @ pos.fp=" + posOut.getFilePointer());
                                // }
                                PosOut.WriteBytes(PayloadBytes, payloadBytesReadUpto, payloadLength);
                                payloadBytesReadUpto += payloadLength;
                            }
                        }
                        else
                        {
                            PosOut.WriteVInt(posDelta);
                        }

                        if (FieldHasOffsets)
                        {
                            // if (DEBUG) {
                            //   System.out.println("          write offset @ pos.fp=" + posOut.getFilePointer());
                            // }
                            int delta  = OffsetStartDeltaBuffer[i];
                            int length = OffsetLengthBuffer[i];
                            if (length == lastOffsetLength)
                            {
                                PosOut.WriteVInt(delta << 1);
                            }
                            else
                            {
                                PosOut.WriteVInt(delta << 1 | 1);
                                PosOut.WriteVInt(length);
                                lastOffsetLength = length;
                            }
                        }
                    }

                    if (FieldHasPayloads)
                    {
                        Debug.Assert(payloadBytesReadUpto == PayloadByteUpto);
                        PayloadByteUpto = 0;
                    }
                }
                // if (DEBUG) {
                //   System.out.println("  totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset);
                // }
            }
            else
            {
                lastPosBlockOffset = -1;
            }

            long skipOffset;

            if (DocCount > Lucene41PostingsFormat.BLOCK_SIZE)
            {
                skipOffset = SkipWriter.WriteSkip(DocOut) - DocStartFP;

                // if (DEBUG) {
                //   System.out.println("skip packet " + (docOut.getFilePointer() - (docStartFP + skipOffset)) + " bytes");
                // }
            }
            else
            {
                skipOffset = -1;
                // if (DEBUG) {
                //   System.out.println("  no skip: docCount=" + docCount);
                // }
            }
            // if (DEBUG) {
            //   System.out.println("  payStartFP=" + payStartFP);
            // }
            state.DocStartFP         = DocStartFP;
            state.PosStartFP         = PosStartFP;
            state.PayStartFP         = PayStartFP;
            state.SingletonDocID     = singletonDocID;
            state.SkipOffset         = skipOffset;
            state.LastPosBlockOffset = lastPosBlockOffset;
            DocBufferUpto            = 0;
            PosBufferUpto            = 0;
            LastDocID = 0;
            DocCount  = 0;
        }
Esempio n. 35
0
        public override void EncodeTerm(long[] empty, DataOutput @out, FieldInfo fieldInfo, BlockTermState state, bool absolute)
        {
            StandardTermState state_ = (StandardTermState)state;

            if (absolute)
            {
                lastState = emptyState;
            }
            @out.WriteVInt64(state_.FreqStart - lastState.FreqStart);
            if (state_.SkipOffset != -1)
            {
                Debug.Assert(state_.SkipOffset > 0);
                @out.WriteVInt64(state_.SkipOffset);
            }
            if (indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0)
            {
                @out.WriteVInt64(state_.ProxStart - lastState.ProxStart);
            }
            lastState = state_;
        }
Esempio n. 36
0
        public override void DecodeTerm(long[] longs, DataInput @in, FieldInfo fieldInfo, BlockTermState termState, bool absolute)
        {
            StandardTermState termState2 = (StandardTermState)termState;
            // if (DEBUG) System.out.println("SPR: nextTerm seg=" + segment + " tbOrd=" + termState2.termBlockOrd + " bytesReader.fp=" + termState.bytesReader.getPosition());
            bool isFirstTerm = termState2.TermBlockOrd == 0;

            if (absolute)
            {
                termState2.freqOffset = 0;
                termState2.proxOffset = 0;
            }

            termState2.freqOffset += @in.ReadVInt64();

            /*
             * if (DEBUG) {
             * System.out.println("  dF=" + termState2.docFreq);
             * System.out.println("  freqFP=" + termState2.freqOffset);
             * }
             */
            Debug.Assert(termState2.freqOffset < freqIn.Length);

            if (termState2.DocFreq >= skipMinimum)
            {
                termState2.skipOffset = @in.ReadVInt64();
                // if (DEBUG) System.out.println("  skipOffset=" + termState2.skipOffset + " vs freqIn.length=" + freqIn.length());
                Debug.Assert(termState2.freqOffset + termState2.skipOffset < freqIn.Length);
            }
            else
            {
                // undefined
            }

            if (fieldInfo.IndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0)
            {
                termState2.proxOffset += @in.ReadVInt64();
                // if (DEBUG) System.out.println("  proxFP=" + termState2.proxOffset);
            }
        }
Esempio n. 37
0
        public override void DecodeTerm(long[] empty, DataInput input, FieldInfo fieldInfo, BlockTermState bTermState,
                                        bool absolute)
        {
            var termState = (SepTermState)bTermState;

            termState.DOC_INDEX.Read(input, absolute);
            if (fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY)
            {
                termState.FREQ_INDEX.Read(input, absolute);
                if (fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                {
                    termState.POS_INDEX.Read(input, absolute);

                    if (fieldInfo.HasPayloads())
                    {
                        if (absolute)
                        {
                            termState.PAYLOAD_FP = input.ReadVLong();
                        }
                        else
                        {
                            termState.PAYLOAD_FP += input.ReadVLong();
                        }
                    }
                }
            }

            if (termState.DocFreq >= _skipMinimum)
            {
                if (absolute)
                {
                    termState.SKIP_FP = input.ReadVLong();
                }
                else
                {
                    termState.SKIP_FP += input.ReadVLong();
                }
            }
            else if (absolute)
            {
                termState.SKIP_FP = 0;
            }
        }
Esempio n. 38
0
        public override void EncodeTerm(long[] empty, DataOutput @out, FieldInfo fieldInfo, BlockTermState _state, bool absolute)
        {
            StandardTermState state = (StandardTermState)_state;

            if (absolute)
            {
                LastState = EmptyState;
            }
            @out.WriteVLong(state.FreqStart - LastState.FreqStart);
            if (state.SkipOffset != -1)
            {
                Debug.Assert(state.SkipOffset > 0);
                @out.WriteVLong(state.SkipOffset);
            }
            if (IndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
            {
                @out.WriteVLong(state.ProxStart - LastState.ProxStart);
            }
            LastState = state;
        }
Esempio n. 39
0
        /// <summary>
        /// Called when we are done adding docs to this term. </summary>
        public override void FinishTerm(BlockTermState state)
        {
            Int32BlockTermState state2 = (Int32BlockTermState)state;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(state2.DocFreq > 0);
            }

            // TODO: wasteful we are counting this (counting # docs
            // for this term) in two places?
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(state2.DocFreq == docCount, "{0} vs {1}", state2.DocFreq, docCount);
            }

            // if (DEBUG) {
            //   System.out.println("FPW.finishTerm docFreq=" + state2.docFreq);
            // }

            // if (DEBUG) {
            //   if (docBufferUpto > 0) {
            //     System.out.println("  write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docStartFP=" + docStartFP);
            //   }
            // }

            // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it.
            int singletonDocID;

            if (state2.DocFreq == 1)
            {
                // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
                singletonDocID = docDeltaBuffer[0];
            }
            else
            {
                singletonDocID = -1;
                // vInt encode the remaining doc deltas and freqs:
                for (int i = 0; i < docBufferUpto; i++)
                {
                    int docDelta = docDeltaBuffer[i];
                    int freq     = freqBuffer[i];
                    if (!fieldHasFreqs)
                    {
                        docOut.WriteVInt32(docDelta);
                    }
                    else if (freqBuffer[i] == 1)
                    {
                        docOut.WriteVInt32((docDelta << 1) | 1);
                    }
                    else
                    {
                        docOut.WriteVInt32(docDelta << 1);
                        docOut.WriteVInt32(freq);
                    }
                }
            }

            long lastPosBlockOffset;

            if (fieldHasPositions)
            {
                // if (DEBUG) {
                //   if (posBufferUpto > 0) {
                //     System.out.println("  write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posStartFP=" + posStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets);
                //   }
                // }

                // totalTermFreq is just total number of positions(or payloads, or offsets)
                // associated with current term.
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(state2.TotalTermFreq != -1);
                }
                if (state2.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE)
                {
                    // record file offset for last pos in last block
                    lastPosBlockOffset = posOut.Position - posStartFP; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                }
                else
                {
                    lastPosBlockOffset = -1;
                }
                if (posBufferUpto > 0)
                {
                    // TODO: should we send offsets/payloads to
                    // .pay...?  seems wasteful (have to store extra
                    // vLong for low (< BLOCK_SIZE) DF terms = vast vast
                    // majority)

                    // vInt encode the remaining positions/payloads/offsets:
                    int lastPayloadLength    = -1; // force first payload length to be written
                    int lastOffsetLength     = -1; // force first offset length to be written
                    int payloadBytesReadUpto = 0;
                    for (int i = 0; i < posBufferUpto; i++)
                    {
                        int posDelta = posDeltaBuffer[i];
                        if (fieldHasPayloads)
                        {
                            int payloadLength = payloadLengthBuffer[i];
                            if (payloadLength != lastPayloadLength)
                            {
                                lastPayloadLength = payloadLength;
                                posOut.WriteVInt32((posDelta << 1) | 1);
                                posOut.WriteVInt32(payloadLength);
                            }
                            else
                            {
                                posOut.WriteVInt32(posDelta << 1);
                            }

                            // if (DEBUG) {
                            //   System.out.println("        i=" + i + " payloadLen=" + payloadLength);
                            // }

                            if (payloadLength != 0)
                            {
                                // if (DEBUG) {
                                //   System.out.println("          write payload @ pos.fp=" + posOut.getFilePointer());
                                // }
                                posOut.WriteBytes(payloadBytes, payloadBytesReadUpto, payloadLength);
                                payloadBytesReadUpto += payloadLength;
                            }
                        }
                        else
                        {
                            posOut.WriteVInt32(posDelta);
                        }

                        if (fieldHasOffsets)
                        {
                            // if (DEBUG) {
                            //   System.out.println("          write offset @ pos.fp=" + posOut.getFilePointer());
                            // }
                            int delta  = offsetStartDeltaBuffer[i];
                            int length = offsetLengthBuffer[i];
                            if (length == lastOffsetLength)
                            {
                                posOut.WriteVInt32(delta << 1);
                            }
                            else
                            {
                                posOut.WriteVInt32(delta << 1 | 1);
                                posOut.WriteVInt32(length);
                                lastOffsetLength = length;
                            }
                        }
                    }

                    if (fieldHasPayloads)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(payloadBytesReadUpto == payloadByteUpto);
                        }
                        payloadByteUpto = 0;
                    }
                }
                // if (DEBUG) {
                //   System.out.println("  totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset);
                // }
            }
            else
            {
                lastPosBlockOffset = -1;
            }

            long skipOffset;

            if (docCount > Lucene41PostingsFormat.BLOCK_SIZE)
            {
                skipOffset = skipWriter.WriteSkip(docOut) - docStartFP;

                // if (DEBUG) {
                //   System.out.println("skip packet " + (docOut.getFilePointer() - (docStartFP + skipOffset)) + " bytes");
                // }
            }
            else
            {
                skipOffset = -1;
                // if (DEBUG) {
                //   System.out.println("  no skip: docCount=" + docCount);
                // }
            }
            // if (DEBUG) {
            //   System.out.println("  payStartFP=" + payStartFP);
            // }
            state2.docStartFP         = docStartFP;
            state2.posStartFP         = posStartFP;
            state2.payStartFP         = payStartFP;
            state2.singletonDocID     = singletonDocID;
            state2.skipOffset         = skipOffset;
            state2.lastPosBlockOffset = lastPosBlockOffset;
            docBufferUpto             = 0;
            posBufferUpto             = 0;
            lastDocID = 0;
            docCount  = 0;
        }
Esempio n. 40
0
        public override void DecodeTerm(long[] empty, DataInput input, FieldInfo fieldInfo, BlockTermState termState,
                                        bool absolute)
        {
            SepTermState termState_ = (SepTermState)termState;

            termState_.docIndex.Read(input, absolute);
            if (fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY)
            {
                termState_.freqIndex.Read(input, absolute);
                if (fieldInfo.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                {
                    //System.out.println("  freqIndex=" + termState.freqIndex);
                    termState_.posIndex.Read(input, absolute);
                    //System.out.println("  posIndex=" + termState.posIndex);
                    if (fieldInfo.HasPayloads)
                    {
                        if (absolute)
                        {
                            termState_.payloadFP = input.ReadVInt64();
                        }
                        else
                        {
                            termState_.payloadFP += input.ReadVInt64();
                        }
                        //System.out.println("  payloadFP=" + termState.payloadFP);
                    }
                }
            }

            if (termState_.DocFreq >= skipMinimum)
            {
                //System.out.println("   readSkip @ " + in.getPosition());
                if (absolute)
                {
                    termState_.skipFP = input.ReadVInt64();
                }
                else
                {
                    termState_.skipFP += input.ReadVInt64();
                }
                //System.out.println("  skipFP=" + termState.skipFP);
            }
            else if (absolute)
            {
                termState_.skipFP = 0;
            }
        }
Esempio n. 41
0
        public override void EncodeTerm(long[] empty, DataOutput @out, FieldInfo fieldInfo, BlockTermState state, bool absolute)
        {
            StandardTermState state_ = (StandardTermState)state;

            if (absolute)
            {
                lastState = emptyState;
            }
            @out.WriteVInt64(state_.FreqStart - lastState.FreqStart);
            if (state_.SkipOffset != -1)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(state_.SkipOffset > 0);
                }
                @out.WriteVInt64(state_.SkipOffset);
            }
            // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare()
            if (IndexOptionsComparer.Default.Compare(indexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0)
            {
                @out.WriteVInt64(state_.ProxStart - lastState.ProxStart);
            }
            lastState = state_;
        }
Esempio n. 42
0
        public override void EncodeTerm(long[] empty, DataOutput output, FieldInfo fieldInfo, BlockTermState state,
                                        bool abs)
        {
            var _state = (PulsingTermState)state;

            Debug.Assert(empty.Length == 0);
            _absolute = _absolute || abs;
            if (_state.bytes == null)
            {
                _wrappedPostingsWriter.EncodeTerm(_longs, _buffer, fieldInfo, _state.wrappedState, _absolute);
                for (var i = 0; i < _longsSize; i++)
                {
                    output.WriteVInt64(_longs[i]);
                }
                _buffer.WriteTo(output);
                _buffer.Reset();
                _absolute = false;
            }
            else
            {
                output.WriteVInt32(_state.bytes.Length);
                output.WriteBytes(_state.bytes, 0, _state.bytes.Length);
                _absolute = _absolute || abs;
            }
        }
Esempio n. 43
0
        public override void DecodeTerm(long[] empty, DataInput input, FieldInfo fieldInfo, BlockTermState bTermState,
            bool absolute)
        {
            var termState = (SepTermState) bTermState;
            termState.DOC_INDEX.Read(input, absolute);
            if (fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY)
            {
                termState.FREQ_INDEX.Read(input, absolute);
                if (fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                {
                    termState.POS_INDEX.Read(input, absolute);
                    
                    if (fieldInfo.HasPayloads())
                    {
                        if (absolute)
                        {
                            termState.PAYLOAD_FP = input.ReadVLong();
                        }
                        else
                        {
                            termState.PAYLOAD_FP += input.ReadVLong();
                        }
                    }
                }
            }

            if (termState.DocFreq >= _skipMinimum)
            {
                if (absolute)
                {
                    termState.SKIP_FP = input.ReadVLong();
                }
                else
                {
                    termState.SKIP_FP += input.ReadVLong();
                }
            }
            else if (absolute)
            {
                termState.SKIP_FP = 0;
            }
        }