Пример #1
0
        public override void EncodeTerm(long[] empty, DataOutput output, FieldInfo fieldInfo, BlockTermState state,
                                        bool abs)
        {
            var _state = (PulsingTermState)state;

            Debug.Assert(empty.Length == 0);
            _absolute = _absolute || abs;
            if (_state.bytes == null)
            {
                _wrappedPostingsWriter.EncodeTerm(_longs, _buffer, fieldInfo, _state.wrappedState, _absolute);
                for (var i = 0; i < _longsSize; i++)
                {
                    output.WriteVInt64(_longs[i]);
                }
                _buffer.WriteTo(output);
                _buffer.Reset();
                _absolute = false;
            }
            else
            {
                output.WriteVInt32(_state.bytes.Length);
                output.WriteBytes(_state.bytes, 0, _state.bytes.Length);
                _absolute = _absolute || abs;
            }
        }
Пример #2
0
            private void FlushBlock()
            {
                //System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());

                // First pass: compute common prefix for all terms
                // in the block, against term before first term in
                // this block:
                int commonPrefix = SharedPrefix(lastPrevTerm, pendingTerms[0].Term);

                for (int termCount = 1; termCount < pendingCount; termCount++)
                {
                    commonPrefix = Math.Min(commonPrefix,
                                            SharedPrefix(lastPrevTerm,
                                                         pendingTerms[termCount].Term));
                }

                outerInstance.m_output.WriteVInt32(pendingCount);
                outerInstance.m_output.WriteVInt32(commonPrefix);

                // 2nd pass: write suffixes, as separate byte[] blob
                for (int termCount = 0; termCount < pendingCount; termCount++)
                {
                    int suffix = pendingTerms[termCount].Term.Length - commonPrefix;
                    // TODO: cutover to better intblock codec, instead
                    // of interleaving here:
                    bytesWriter.WriteVInt32(suffix);
                    bytesWriter.WriteBytes(pendingTerms[termCount].Term.Bytes, commonPrefix, suffix);
                }
                outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                bytesWriter.WriteTo(outerInstance.m_output);
                bytesWriter.Reset();

                // 3rd pass: write the freqs as byte[] blob
                // TODO: cutover to better intblock codec.  simple64?
                // write prefix, suffix first:
                for (int termCount = 0; termCount < pendingCount; termCount++)
                {
                    BlockTermState state = pendingTerms[termCount].State;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(state != null);
                    }
                    bytesWriter.WriteVInt32(state.DocFreq);
                    if (fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY)
                    {
                        bytesWriter.WriteVInt64(state.TotalTermFreq - state.DocFreq);
                    }
                }
                outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                bytesWriter.WriteTo(outerInstance.m_output);
                bytesWriter.Reset();

                // 4th pass: write the metadata
                long[] longs    = new long[longsSize];
                bool   absolute = true;

                for (int termCount = 0; termCount < pendingCount; termCount++)
                {
                    BlockTermState state = pendingTerms[termCount].State;
                    postingsWriter.EncodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
                    for (int i = 0; i < longsSize; i++)
                    {
                        bytesWriter.WriteVInt64(longs[i]);
                    }
                    bufferWriter.WriteTo(bytesWriter);
                    bufferWriter.Reset();
                    absolute = false;
                }
                outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                bytesWriter.WriteTo(outerInstance.m_output);
                bytesWriter.Reset();

                lastPrevTerm.CopyBytes(pendingTerms[pendingCount - 1].Term);
                pendingCount = 0;
            }
Пример #3
0
            private void FlushBlock()
            {
                // First pass: compute common prefix for all terms
                // in the block, against term before first term in
                // this block:

                int commonPrefix = SharedPrefix(_lastPrevTerm, _pendingTerms[0].Term);

                for (int termCount = 1; termCount < _pendingCount; termCount++)
                {
                    commonPrefix = Math.Min(commonPrefix,
                                            SharedPrefix(_lastPrevTerm,
                                                         _pendingTerms[termCount].Term));
                }

                _btw._output.WriteVInt(_pendingCount);
                _btw._output.WriteVInt(commonPrefix);

                // 2nd pass: write suffixes, as separate byte[] blob
                for (var termCount = 0; termCount < _pendingCount; termCount++)
                {
                    var suffix = _pendingTerms[termCount].Term.Length - commonPrefix;
                    // TODO: cutover to better intblock codec, instead
                    // of interleaving here:
                    _bytesWriter.WriteVInt(suffix);
                    _bytesWriter.WriteBytes(_pendingTerms[termCount].Term.Bytes, commonPrefix, suffix);
                }
                _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
                _bytesWriter.WriteTo(_btw._output);
                _bytesWriter.Reset();

                // 3rd pass: write the freqs as byte[] blob
                // TODO: cutover to better intblock codec.  simple64?
                // write prefix, suffix first:
                for (int termCount = 0; termCount < _pendingCount; termCount++)
                {
                    BlockTermState state = _pendingTerms[termCount].State;

                    Debug.Assert(state != null);

                    _bytesWriter.WriteVInt(state.DocFreq);
                    if (_fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY)
                    {
                        _bytesWriter.WriteVLong(state.TotalTermFreq - state.DocFreq);
                    }
                }
                _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
                _bytesWriter.WriteTo(_btw._output);
                _bytesWriter.Reset();

                // 4th pass: write the metadata
                var  longs    = new long[_longsSize];
                bool absolute = true;

                for (int termCount = 0; termCount < _pendingCount; termCount++)
                {
                    BlockTermState state = _pendingTerms[termCount].State;
                    _postingsWriter.EncodeTerm(longs, _bufferWriter, _fieldInfo, state, absolute);
                    for (int i = 0; i < _longsSize; i++)
                    {
                        _bytesWriter.WriteVLong(longs[i]);
                    }
                    _bufferWriter.WriteTo(_bytesWriter);
                    _bufferWriter.Reset();
                    absolute = false;
                }
                _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
                _bytesWriter.WriteTo(_btw._output);
                _bytesWriter.Reset();

                _lastPrevTerm.CopyBytes(_pendingTerms[_pendingCount - 1].Term);
                _pendingCount = 0;
            }