Beispiel #1
0
            public override void FinishTerm(BytesRef text, TermStats stats)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(stats.DocFreq > 0);
                }
                //System.out.println("BTW: finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);

                bool isIndexTerm = fieldIndexWriter.CheckIndexTerm(text, stats);

                if (isIndexTerm)
                {
                    if (pendingCount > 0)
                    {
                        // Instead of writing each term, live, we gather terms
                        // in RAM in a pending buffer, and then write the
                        // entire block in between index terms:
                        FlushBlock();
                    }
                    fieldIndexWriter.Add(text, stats, outerInstance.m_output.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    //System.out.println("  index term!");
                }

                if (pendingTerms.Length == pendingCount)
                {
                    TermEntry[] newArray = new TermEntry[ArrayUtil.Oversize(pendingCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
                    System.Array.Copy(pendingTerms, 0, newArray, 0, pendingCount);
                    for (int i = pendingCount; i < newArray.Length; i++)
                    {
                        newArray[i] = new TermEntry();
                    }
                    pendingTerms = newArray;
                }
                TermEntry te = pendingTerms[pendingCount];

                te.Term.CopyBytes(text);
                te.State               = postingsWriter.NewTermState();
                te.State.DocFreq       = stats.DocFreq;
                te.State.TotalTermFreq = stats.TotalTermFreq;
                postingsWriter.FinishTerm(te.State);

                pendingCount++;
                numTerms++;
            }
Beispiel #2
0
            public override void FinishTerm(BytesRef text, TermStats stats)
            {
                Debug.Assert(stats.DocFreq > 0);

                var isIndexTerm = _fieldIndexWriter.CheckIndexTerm(text, stats);

                if (isIndexTerm)
                {
                    if (_pendingCount > 0)
                    {
                        // Instead of writing each term, live, we gather terms
                        // in RAM in a pending buffer, and then write the
                        // entire block in between index terms:
                        FlushBlock();
                    }
                    _fieldIndexWriter.Add(text, stats, _btw._output.FilePointer);
                }

                if (_pendingTerms.Length == _pendingCount)
                {
                    var newArray =
                        new TermEntry[ArrayUtil.Oversize(_pendingCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
                    Array.Copy(_pendingTerms, 0, newArray, 0, _pendingCount);
                    for (var i = _pendingCount; i < newArray.Length; i++)
                    {
                        newArray[i] = new TermEntry();
                    }
                    _pendingTerms = newArray;
                }
                var te = _pendingTerms[_pendingCount];

                te.Term.CopyBytes(text);
                te.State               = _postingsWriter.NewTermState();
                te.State.DocFreq       = stats.DocFreq;
                te.State.TotalTermFreq = stats.TotalTermFreq;
                _postingsWriter.FinishTerm(te.State);

                _pendingCount++;
                _numTerms++;
            }
Beispiel #3
0
        /// <summary>
        /// Called when we are done adding docs to this term
        /// </summary>
        /// <param name="state"></param>
        public override void FinishTerm(BlockTermState state)
        {
            var state2 = (PulsingTermState)state;

            Debug.Assert(_pendingCount > 0 || _pendingCount == -1);

            if (_pendingCount == -1)
            {
                state2.wrappedState.DocFreq       = state2.DocFreq;
                state2.wrappedState.TotalTermFreq = state2.TotalTermFreq;
                state2.bytes = null;
                _wrappedPostingsWriter.FinishTerm(state2.wrappedState);
            }
            else
            {
                // There were few enough total occurrences for this
                // term, so we fully inline our postings data into
                // terms dict, now:

                // TODO: it'd be better to share this encoding logic
                // in some inner codec that knows how to write a
                // single doc / single position, etc.  This way if a
                // given codec wants to store other interesting
                // stuff, it could use this pulsing codec to do so

                if (_indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0)
                {
                    var lastDocID         = 0;
                    var pendingIDX        = 0;
                    var lastPayloadLength = -1;
                    var lastOffsetLength  = -1;
                    while (pendingIDX < _pendingCount)
                    {
                        var doc = _pending[pendingIDX];

                        var delta = doc.docID - lastDocID;
                        lastDocID = doc.docID;

                        // if (DEBUG) System.out.println("  write doc=" + doc.docID + " freq=" + doc.termFreq);

                        if (doc.termFreq == 1)
                        {
                            _buffer.WriteVInt32((delta << 1) | 1);
                        }
                        else
                        {
                            _buffer.WriteVInt32(delta << 1);
                            _buffer.WriteVInt32(doc.termFreq);
                        }

                        var lastPos    = 0;
                        var lastOffset = 0;
                        for (var posIDX = 0; posIDX < doc.termFreq; posIDX++)
                        {
                            var pos = _pending[pendingIDX++];
                            Debug.Assert(pos.docID == doc.docID);
                            var posDelta = pos.pos - lastPos;
                            lastPos = pos.pos;

                            var payloadLength = pos.payload == null ? 0 : pos.payload.Length;
                            if (_storePayloads)
                            {
                                if (payloadLength != lastPayloadLength)
                                {
                                    _buffer.WriteVInt32((posDelta << 1) | 1);
                                    _buffer.WriteVInt32(payloadLength);
                                    lastPayloadLength = payloadLength;
                                }
                                else
                                {
                                    _buffer.WriteVInt32(posDelta << 1);
                                }
                            }
                            else
                            {
                                _buffer.WriteVInt32(posDelta);
                            }

                            if (_indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0)
                            {
                                //System.out.println("write=" + pos.startOffset + "," + pos.endOffset);
                                var offsetDelta  = pos.startOffset - lastOffset;
                                var offsetLength = pos.endOffset - pos.startOffset;
                                if (offsetLength != lastOffsetLength)
                                {
                                    _buffer.WriteVInt32(offsetDelta << 1 | 1);
                                    _buffer.WriteVInt32(offsetLength);
                                }
                                else
                                {
                                    _buffer.WriteVInt32(offsetDelta << 1);
                                }
                                lastOffset       = pos.startOffset;
                                lastOffsetLength = offsetLength;
                            }

                            if (payloadLength > 0)
                            {
                                Debug.Assert(_storePayloads);
                                _buffer.WriteBytes(pos.payload.Bytes, 0, pos.payload.Length);
                            }
                        }
                    }
                }
                else if (_indexOptions == IndexOptions.DOCS_AND_FREQS)
                {
                    int lastDocId = 0;
                    for (int posIdx = 0; posIdx < _pendingCount; posIdx++)
                    {
                        Position doc   = _pending[posIdx];
                        int      delta = doc.docID - lastDocId;

                        Debug.Assert(doc.termFreq != 0);

                        if (doc.termFreq == 1)
                        {
                            _buffer.WriteVInt32((delta << 1) | 1);
                        }
                        else
                        {
                            _buffer.WriteVInt32(delta << 1);
                            _buffer.WriteVInt32(doc.termFreq);
                        }
                        lastDocId = doc.docID;
                    }
                }
                else if (_indexOptions == IndexOptions.DOCS_ONLY)
                {
                    int lastDocId = 0;
                    for (int posIdx = 0; posIdx < _pendingCount; posIdx++)
                    {
                        Position doc = _pending[posIdx];
                        _buffer.WriteVInt32(doc.docID - lastDocId);
                        lastDocId = doc.docID;
                    }
                }

                state2.bytes = new byte[(int)_buffer.GetFilePointer()];
                _buffer.WriteTo(state2.bytes, 0);
                _buffer.Reset();
            }
            _pendingCount = 0;
        }