Пример #1
0
            public override void FinishTerm(BytesRef text, TermStats stats)
            {
                // write term meta data into fst

                var state = _outerInstance._postingsWriter.NewTermState();

                var meta = new FSTTermOutputs.TermData
                {
                    longs         = new long[_longsSize],
                    bytes         = null,
                    docFreq       = state.DocFreq = stats.DocFreq,
                    totalTermFreq = state.TotalTermFreq = stats.TotalTermFreq
                };

                _outerInstance._postingsWriter.FinishTerm(state);
                _outerInstance._postingsWriter.EncodeTerm(meta.longs, _metaWriter, _fieldInfo, state, true);
                var bytesSize = (int)_metaWriter.GetFilePointer();

                if (bytesSize > 0)
                {
                    meta.bytes = new byte[bytesSize];
                    _metaWriter.WriteTo(meta.bytes, 0);
                    _metaWriter.Reset();
                }
                _builder.Add(Util.ToInt32sRef(text, _scratchTerm), meta);
                _numTerms++;
            }
Пример #2
0
            public override void FinishTerm(BytesRef text, TermStats stats)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(postingsWriter.docCount == stats.DocFreq);
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(buffer2.GetFilePointer() == 0);
                }

                buffer2.WriteVInt32(stats.DocFreq);
                if (field.IndexOptions != IndexOptions.DOCS_ONLY)
                {
                    buffer2.WriteVInt64(stats.TotalTermFreq - stats.DocFreq);
                }
                int pos = (int)buffer2.GetFilePointer();

                buffer2.WriteTo(finalBuffer, 0);
                buffer2.Reset();

                int totalBytes = pos + (int)postingsWriter.buffer.GetFilePointer();

                if (totalBytes > finalBuffer.Length)
                {
                    finalBuffer = ArrayUtil.Grow(finalBuffer, totalBytes);
                }
                postingsWriter.buffer.WriteTo(finalBuffer, pos);
                postingsWriter.buffer.Reset();

                spare.Bytes  = finalBuffer;
                spare.Length = totalBytes;

                //System.out.println("    finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
                //for(int i=0;i<totalBytes;i++) {
                //  System.out.println("      " + Integer.toHexString(finalBuffer[i]&0xFF));
                //}

                builder.Add(Util.ToInt32sRef(text, scratchIntsRef), BytesRef.DeepCopyOf(spare));
                termCount++;
            }
Пример #3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void finishTerm(util.BytesRef text, codecs.TermStats stats) throws java.io.IOException
            public override void finishTerm(BytesRef text, TermStats stats)
            {
                Debug.Assert(postingsWriter.docCount == stats.docFreq);

                Debug.Assert(buffer2.FilePointer == 0);

                buffer2.WriteVInt(stats.docFreq);
                if (field.IndexOptions != IndexOptions.DOCS_ONLY)
                {
                    buffer2.WriteVLong(stats.totalTermFreq - stats.docFreq);
                }
                int pos = (int)buffer2.FilePointer;

                buffer2.WriteTo(finalBuffer, 0);
                buffer2.reset();

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int totalBytes = pos + (int) postingsWriter.buffer.getFilePointer();
                int totalBytes = pos + (int)postingsWriter.buffer.FilePointer;

                if (totalBytes > finalBuffer.Length)
                {
                    finalBuffer = ArrayUtil.grow(finalBuffer, totalBytes);
                }
                postingsWriter.buffer.WriteTo(finalBuffer, pos);
                postingsWriter.buffer.reset();

                spare.bytes  = finalBuffer;
                spare.length = totalBytes;

                //System.out.println("    finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
                //for(int i=0;i<totalBytes;i++) {
                //  System.out.println("      " + Integer.toHexString(finalBuffer[i]&0xFF));
                //}

                builder.add(Util.toIntsRef(text, scratchIntsRef), BytesRef.deepCopyOf(spare));
                termCount++;
            }
        /// <summary>
        /// expert: writes a value dictionary for a sorted/sortedset field </summary>
        protected internal virtual void AddTermsDict(FieldInfo field, IEnumerable<BytesRef> values)
        {
            // first check if its a "fixed-length" terms dict
            int minLength = int.MaxValue;
            int maxLength = int.MinValue;
            foreach (BytesRef v in values)
            {
                minLength = Math.Min(minLength, v.Length);
                maxLength = Math.Max(maxLength, v.Length);
            }
            if (minLength == maxLength)
            {
                // no index needed: direct addressing by mult
                AddBinaryField(field, values);
            }
            else
            {
                // header
                Meta.WriteVInt(field.Number);
                Meta.WriteByte((byte)Lucene45DocValuesFormat.BINARY);
                Meta.WriteVInt(BINARY_PREFIX_COMPRESSED);
                Meta.WriteLong(-1L);
                // now write the bytes: sharing prefixes within a block
                long startFP = Data.FilePointer;
                // currently, we have to store the delta from expected for every 1/nth term
                // we could avoid this, but its not much and less overall RAM than the previous approach!
                RAMOutputStream addressBuffer = new RAMOutputStream();
                MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCK_SIZE);
                BytesRef lastTerm = new BytesRef();
                long count = 0;
                foreach (BytesRef v in values)
                {
                    if (count % ADDRESS_INTERVAL == 0)
                    {
                        termAddresses.Add(Data.FilePointer - startFP);
                        // force the first term in a block to be abs-encoded
                        lastTerm.Length = 0;
                    }

                    // prefix-code
                    int sharedPrefix = StringHelper.BytesDifference(lastTerm, v);
                    Data.WriteVInt(sharedPrefix);
                    Data.WriteVInt(v.Length - sharedPrefix);
                    Data.WriteBytes(v.Bytes, v.Offset + sharedPrefix, v.Length - sharedPrefix);
                    lastTerm.CopyBytes(v);
                    count++;
                }
                long indexStartFP = Data.FilePointer;
                // write addresses of indexed terms
                termAddresses.Finish();
                addressBuffer.WriteTo(Data);
                addressBuffer = null;
                termAddresses = null;
                Meta.WriteVInt(minLength);
                Meta.WriteVInt(maxLength);
                Meta.WriteVLong(count);
                Meta.WriteLong(startFP);
                Meta.WriteVInt(ADDRESS_INTERVAL);
                Meta.WriteLong(indexStartFP);
                Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                Meta.WriteVInt(BLOCK_SIZE);
            }
        }
Пример #5
0
        /// <summary>
        /// Called when we are done adding docs to this term
        /// </summary>
        /// <param name="state"></param>
        public override void FinishTerm(BlockTermState state)
        {
            var state2 = (PulsingTermState)state;

            Debug.Assert(_pendingCount > 0 || _pendingCount == -1);

            if (_pendingCount == -1)
            {
                state2.wrappedState.DocFreq       = state2.DocFreq;
                state2.wrappedState.TotalTermFreq = state2.TotalTermFreq;
                state2.bytes = null;
                _wrappedPostingsWriter.FinishTerm(state2.wrappedState);
            }
            else
            {
                // There were few enough total occurrences for this
                // term, so we fully inline our postings data into
                // terms dict, now:

                // TODO: it'd be better to share this encoding logic
                // in some inner codec that knows how to write a
                // single doc / single position, etc.  This way if a
                // given codec wants to store other interesting
                // stuff, it could use this pulsing codec to do so

                if (_indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0)
                {
                    var lastDocID         = 0;
                    var pendingIDX        = 0;
                    var lastPayloadLength = -1;
                    var lastOffsetLength  = -1;
                    while (pendingIDX < _pendingCount)
                    {
                        var doc = _pending[pendingIDX];

                        var delta = doc.docID - lastDocID;
                        lastDocID = doc.docID;

                        // if (DEBUG) System.out.println("  write doc=" + doc.docID + " freq=" + doc.termFreq);

                        if (doc.termFreq == 1)
                        {
                            _buffer.WriteVInt32((delta << 1) | 1);
                        }
                        else
                        {
                            _buffer.WriteVInt32(delta << 1);
                            _buffer.WriteVInt32(doc.termFreq);
                        }

                        var lastPos    = 0;
                        var lastOffset = 0;
                        for (var posIDX = 0; posIDX < doc.termFreq; posIDX++)
                        {
                            var pos = _pending[pendingIDX++];
                            Debug.Assert(pos.docID == doc.docID);
                            var posDelta = pos.pos - lastPos;
                            lastPos = pos.pos;

                            var payloadLength = pos.payload == null ? 0 : pos.payload.Length;
                            if (_storePayloads)
                            {
                                if (payloadLength != lastPayloadLength)
                                {
                                    _buffer.WriteVInt32((posDelta << 1) | 1);
                                    _buffer.WriteVInt32(payloadLength);
                                    lastPayloadLength = payloadLength;
                                }
                                else
                                {
                                    _buffer.WriteVInt32(posDelta << 1);
                                }
                            }
                            else
                            {
                                _buffer.WriteVInt32(posDelta);
                            }

                            if (_indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0)
                            {
                                //System.out.println("write=" + pos.startOffset + "," + pos.endOffset);
                                var offsetDelta  = pos.startOffset - lastOffset;
                                var offsetLength = pos.endOffset - pos.startOffset;
                                if (offsetLength != lastOffsetLength)
                                {
                                    _buffer.WriteVInt32(offsetDelta << 1 | 1);
                                    _buffer.WriteVInt32(offsetLength);
                                }
                                else
                                {
                                    _buffer.WriteVInt32(offsetDelta << 1);
                                }
                                lastOffset       = pos.startOffset;
                                lastOffsetLength = offsetLength;
                            }

                            if (payloadLength > 0)
                            {
                                Debug.Assert(_storePayloads);
                                _buffer.WriteBytes(pos.payload.Bytes, 0, pos.payload.Length);
                            }
                        }
                    }
                }
                else if (_indexOptions == IndexOptions.DOCS_AND_FREQS)
                {
                    int lastDocId = 0;
                    for (int posIdx = 0; posIdx < _pendingCount; posIdx++)
                    {
                        Position doc   = _pending[posIdx];
                        int      delta = doc.docID - lastDocId;

                        Debug.Assert(doc.termFreq != 0);

                        if (doc.termFreq == 1)
                        {
                            _buffer.WriteVInt32((delta << 1) | 1);
                        }
                        else
                        {
                            _buffer.WriteVInt32(delta << 1);
                            _buffer.WriteVInt32(doc.termFreq);
                        }
                        lastDocId = doc.docID;
                    }
                }
                else if (_indexOptions == IndexOptions.DOCS_ONLY)
                {
                    int lastDocId = 0;
                    for (int posIdx = 0; posIdx < _pendingCount; posIdx++)
                    {
                        Position doc = _pending[posIdx];
                        _buffer.WriteVInt32(doc.docID - lastDocId);
                        lastDocId = doc.docID;
                    }
                }

                state2.bytes = new byte[(int)_buffer.GetFilePointer()];
                _buffer.WriteTo(state2.bytes, 0);
                _buffer.Reset();
            }
            _pendingCount = 0;
        }
Пример #6
0
        /// <summary>
        /// expert: writes a value dictionary for a sorted/sortedset field </summary>
        protected internal virtual void AddTermsDict(FieldInfo field, IEnumerable <BytesRef> values)
        {
            // first check if its a "fixed-length" terms dict
            int minLength = int.MaxValue;
            int maxLength = int.MinValue;

            foreach (BytesRef v in values)
            {
                minLength = Math.Min(minLength, v.Length);
                maxLength = Math.Max(maxLength, v.Length);
            }
            if (minLength == maxLength)
            {
                // no index needed: direct addressing by mult
                AddBinaryField(field, values);
            }
            else
            {
                // header
                Meta.WriteVInt(field.Number);
                Meta.WriteByte((byte)Lucene45DocValuesFormat.BINARY);
                Meta.WriteVInt(BINARY_PREFIX_COMPRESSED);
                Meta.WriteLong(-1L);
                // now write the bytes: sharing prefixes within a block
                long startFP = Data.FilePointer;
                // currently, we have to store the delta from expected for every 1/nth term
                // we could avoid this, but its not much and less overall RAM than the previous approach!
                RAMOutputStream            addressBuffer = new RAMOutputStream();
                MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCK_SIZE);
                BytesRef lastTerm = new BytesRef();
                long     count    = 0;
                foreach (BytesRef v in values)
                {
                    if (count % ADDRESS_INTERVAL == 0)
                    {
                        termAddresses.Add(Data.FilePointer - startFP);
                        // force the first term in a block to be abs-encoded
                        lastTerm.Length = 0;
                    }

                    // prefix-code
                    int sharedPrefix = StringHelper.BytesDifference(lastTerm, v);
                    Data.WriteVInt(sharedPrefix);
                    Data.WriteVInt(v.Length - sharedPrefix);
                    Data.WriteBytes(v.Bytes, v.Offset + sharedPrefix, v.Length - sharedPrefix);
                    lastTerm.CopyBytes(v);
                    count++;
                }
                long indexStartFP = Data.FilePointer;
                // write addresses of indexed terms
                termAddresses.Finish();
                addressBuffer.WriteTo(Data);
                addressBuffer = null;
                termAddresses = null;
                Meta.WriteVInt(minLength);
                Meta.WriteVInt(maxLength);
                Meta.WriteVLong(count);
                Meta.WriteLong(startFP);
                Meta.WriteVInt(ADDRESS_INTERVAL);
                Meta.WriteLong(indexStartFP);
                Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                Meta.WriteVInt(BLOCK_SIZE);
            }
        }
Пример #7
0
            private void FlushBlock()
            {
                //System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());

                // First pass: compute common prefix for all terms
                // in the block, against term before first term in
                // this block:
                int commonPrefix = SharedPrefix(lastPrevTerm, pendingTerms[0].Term);

                for (int termCount = 1; termCount < pendingCount; termCount++)
                {
                    commonPrefix = Math.Min(commonPrefix,
                                            SharedPrefix(lastPrevTerm,
                                                         pendingTerms[termCount].Term));
                }

                outerInstance.m_output.WriteVInt32(pendingCount);
                outerInstance.m_output.WriteVInt32(commonPrefix);

                // 2nd pass: write suffixes, as separate byte[] blob
                for (int termCount = 0; termCount < pendingCount; termCount++)
                {
                    int suffix = pendingTerms[termCount].Term.Length - commonPrefix;
                    // TODO: cutover to better intblock codec, instead
                    // of interleaving here:
                    bytesWriter.WriteVInt32(suffix);
                    bytesWriter.WriteBytes(pendingTerms[termCount].Term.Bytes, commonPrefix, suffix);
                }
                outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                bytesWriter.WriteTo(outerInstance.m_output);
                bytesWriter.Reset();

                // 3rd pass: write the freqs as byte[] blob
                // TODO: cutover to better intblock codec.  simple64?
                // write prefix, suffix first:
                for (int termCount = 0; termCount < pendingCount; termCount++)
                {
                    BlockTermState state = pendingTerms[termCount].State;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(state != null);
                    }
                    bytesWriter.WriteVInt32(state.DocFreq);
                    if (fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY)
                    {
                        bytesWriter.WriteVInt64(state.TotalTermFreq - state.DocFreq);
                    }
                }
                outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                bytesWriter.WriteTo(outerInstance.m_output);
                bytesWriter.Reset();

                // 4th pass: write the metadata
                long[] longs    = new long[longsSize];
                bool   absolute = true;

                for (int termCount = 0; termCount < pendingCount; termCount++)
                {
                    BlockTermState state = pendingTerms[termCount].State;
                    postingsWriter.EncodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
                    for (int i = 0; i < longsSize; i++)
                    {
                        bytesWriter.WriteVInt64(longs[i]);
                    }
                    bufferWriter.WriteTo(bytesWriter);
                    bufferWriter.Reset();
                    absolute = false;
                }
                outerInstance.m_output.WriteVInt32((int)bytesWriter.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                bytesWriter.WriteTo(outerInstance.m_output);
                bytesWriter.Reset();

                lastPrevTerm.CopyBytes(pendingTerms[pendingCount - 1].Term);
                pendingCount = 0;
            }
            public DirectField(SegmentReadState state, string field, Terms termsIn, int minSkipCount, int lowFreqCutoff)
            {
                FieldInfo fieldInfo = state.FieldInfos.FieldInfo(field);

                sumTotalTermFreq = termsIn.SumTotalTermFreq;
                sumDocFreq = termsIn.SumDocFreq;
                docCount = termsIn.DocCount;

                int numTerms = (int) termsIn.Size();
                if (numTerms == -1)
                {
                    throw new System.ArgumentException("codec does not provide Terms.size()");
                }
                terms = new TermAndSkip[numTerms];
                termOffsets = new int[1 + numTerms];

                byte[] termBytes = new byte[1024];

                this.minSkipCount = minSkipCount;

                hasFreq = fieldInfo.FieldIndexOptions.Value.CompareTo(IndexOptions.DOCS_ONLY) > 0;
                hasPos = fieldInfo.FieldIndexOptions.Value.CompareTo(IndexOptions.DOCS_AND_FREQS) > 0;
                hasOffsets_Renamed = fieldInfo.FieldIndexOptions.Value.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0;
                hasPayloads_Renamed = fieldInfo.HasPayloads();

                BytesRef term;
                DocsEnum docsEnum = null;
                DocsAndPositionsEnum docsAndPositionsEnum = null;
                TermsEnum termsEnum = termsIn.Iterator(null);
                int termOffset = 0;

                IntArrayWriter scratch = new IntArrayWriter();

                // Used for payloads, if any:
                RAMOutputStream ros = new RAMOutputStream();

                // if (DEBUG) {
                //   System.out.println("\nLOAD terms seg=" + state.segmentInfo.name + " field=" + field + " hasOffsets=" + hasOffsets + " hasFreq=" + hasFreq + " hasPos=" + hasPos + " hasPayloads=" + hasPayloads);
                // }

                while ((term = termsEnum.Next()) != null)
                {
                    int docFreq = termsEnum.DocFreq();
                    long totalTermFreq = termsEnum.TotalTermFreq();

                    // if (DEBUG) {
                    //   System.out.println("  term=" + term.utf8ToString());
                    // }

                    termOffsets[count] = termOffset;

                    if (termBytes.Length < (termOffset + term.Length))
                    {
                        termBytes = ArrayUtil.Grow(termBytes, termOffset + term.Length);
                    }
                    Array.Copy(term.Bytes, term.Offset, termBytes, termOffset, term.Length);
                    termOffset += term.Length;
                    termOffsets[count + 1] = termOffset;

                    if (hasPos)
                    {
                        docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum);
                    }
                    else
                    {
                        docsEnum = termsEnum.Docs(null, docsEnum);
                    }

                    TermAndSkip ent;

                    DocsEnum docsEnum2;
                    docsEnum2 = hasPos ? docsAndPositionsEnum : docsEnum;

                    int docID;

                    if (docFreq <= lowFreqCutoff)
                    {

                        ros.Reset();

                        // Pack postings for low-freq terms into a single int[]:
                        while ((docID = docsEnum2.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                        {
                            scratch.Add(docID);
                            if (hasFreq)
                            {
                                int freq = docsEnum2.Freq();
                                scratch.Add(freq);
                                if (hasPos)
                                {
                                    for (int pos = 0; pos < freq; pos++)
                                    {
                                        scratch.Add(docsAndPositionsEnum.NextPosition());
                                        if (hasOffsets_Renamed)
                                        {
                                            scratch.Add(docsAndPositionsEnum.StartOffset());
                                            scratch.Add(docsAndPositionsEnum.EndOffset());
                                        }
                                        if (hasPayloads_Renamed)
                                        {
                                            BytesRef payload = docsAndPositionsEnum.Payload;
                                            if (payload != null)
                                            {
                                                scratch.Add(payload.Length);
                                                ros.WriteBytes(payload.Bytes, payload.Offset, payload.Length);
                                            }
                                            else
                                            {
                                                scratch.Add(0);
                                            }
                                        }
                                    }
                                }
                            }
                        }


                        byte[] payloads;
                        if (hasPayloads_Renamed)
                        {
                            ros.Flush();
                            payloads = new byte[(int) ros.Length];
                            ros.WriteTo(payloads, 0);
                        }
                        else
                        {
                            payloads = null;
                        }

                        int[] postings = scratch.Get();

                        ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
                    }
                    else
                    {
                        var docs = new int[docFreq];
                        int[] freqs;
                        int[][] positions;
                        byte[][][] payloads;

                        if (hasFreq)
                        {
                            freqs = new int[docFreq];
                            if (hasPos)
                            {
                                positions = new int[docFreq][];
                                if (hasPayloads_Renamed)
                                {
                                    payloads = new byte[docFreq][][];
                                }
                                else
                                {
                                    payloads = null;
                                }
                            }
                            else
                            {
                                positions = null;
                                payloads = null;
                            }
                        }
                        else
                        {
                            freqs = null;
                            positions = null;
                            payloads = null;
                        }

                        // Use separate int[] for the postings for high-freq
                        // terms:
                        int upto = 0;
                        while ((docID = docsEnum2.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                        {
                            docs[upto] = docID;
                            if (hasFreq)
                            {
                                int freq = docsEnum2.Freq();
                                freqs[upto] = freq;
                                if (hasPos)
                                {
                                    int mult;
                                    if (hasOffsets_Renamed)
                                    {
                                        mult = 3;
                                    }
                                    else
                                    {
                                        mult = 1;
                                    }
                                    if (hasPayloads_Renamed)
                                    {
                                        payloads[upto] = new byte[freq][];
                                    }
                                    positions[upto] = new int[mult*freq];
                                    int posUpto = 0;
                                    for (int pos = 0; pos < freq; pos++)
                                    {
                                        positions[upto][posUpto] = docsAndPositionsEnum.NextPosition();
                                        if (hasPayloads_Renamed)
                                        {
                                            BytesRef payload = docsAndPositionsEnum.Payload;
                                            if (payload != null)
                                            {
                                                var payloadBytes = new byte[payload.Length];
                                                Array.Copy(payload.Bytes, payload.Offset, payloadBytes, 0,
                                                    payload.Length);
                                                payloads[upto][pos] = payloadBytes;
                                            }
                                        }
                                        posUpto++;
                                        if (hasOffsets_Renamed)
                                        {
                                            positions[upto][posUpto++] = docsAndPositionsEnum.StartOffset();
                                            positions[upto][posUpto++] = docsAndPositionsEnum.EndOffset();
                                        }
                                    }
                                }
                            }

                            upto++;
                        }
                        Debug.Assert(upto == docFreq);
                        ent = new HighFreqTerm(docs, freqs, positions, payloads, totalTermFreq);
                    }

                    terms[count] = ent;
                    SetSkips(count, termBytes);
                    count++;
                }

                // End sentinel:
                termOffsets[count] = termOffset;

                FinishSkips();

                //System.out.println(skipCount + " skips: " + field);

                this.termBytes = new byte[termOffset];
                Array.Copy(termBytes, 0, this.termBytes, 0, termOffset);

                // Pack skips:
                this.skips = new int[skipCount];
                this.skipOffsets = new int[1 + numTerms];

                int skipOffset = 0;
                for (int i = 0; i < numTerms; i++)
                {
                    int[] termSkips = terms[i].skips;
                    skipOffsets[i] = skipOffset;
                    if (termSkips != null)
                    {
                        Array.Copy(termSkips, 0, skips, skipOffset, termSkips.Length);
                        skipOffset += termSkips.Length;
                        terms[i].skips = null;
                    }
                }
                this.skipOffsets[numTerms] = skipOffset;
                Debug.Assert(skipOffset == skipCount);
            }
Пример #9
0
 // Writes the contents of buffer into the fields stream
 // and adds a new entry for this document into the index
 // stream.  This assumes the buffer was already written
 // in the correct fields format.
 internal void  FlushDocument(int numStoredFields, RAMOutputStream buffer)
 {
     indexStream.WriteLong(fieldsStream.FilePointer);
     fieldsStream.WriteVInt(numStoredFields);
     buffer.WriteTo(fieldsStream);
 }
Пример #10
0
            private void FlushBlock()
            {
                // First pass: compute common prefix for all terms
                // in the block, against term before first term in
                // this block:

                int commonPrefix = SharedPrefix(_lastPrevTerm, _pendingTerms[0].Term);

                for (int termCount = 1; termCount < _pendingCount; termCount++)
                {
                    commonPrefix = Math.Min(commonPrefix,
                                            SharedPrefix(_lastPrevTerm,
                                                         _pendingTerms[termCount].Term));
                }

                _btw._output.WriteVInt(_pendingCount);
                _btw._output.WriteVInt(commonPrefix);

                // 2nd pass: write suffixes, as separate byte[] blob
                for (var termCount = 0; termCount < _pendingCount; termCount++)
                {
                    var suffix = _pendingTerms[termCount].Term.Length - commonPrefix;
                    // TODO: cutover to better intblock codec, instead
                    // of interleaving here:
                    _bytesWriter.WriteVInt(suffix);
                    _bytesWriter.WriteBytes(_pendingTerms[termCount].Term.Bytes, commonPrefix, suffix);
                }
                _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
                _bytesWriter.WriteTo(_btw._output);
                _bytesWriter.Reset();

                // 3rd pass: write the freqs as byte[] blob
                // TODO: cutover to better intblock codec.  simple64?
                // write prefix, suffix first:
                for (int termCount = 0; termCount < _pendingCount; termCount++)
                {
                    BlockTermState state = _pendingTerms[termCount].State;

                    Debug.Assert(state != null);

                    _bytesWriter.WriteVInt(state.DocFreq);
                    if (_fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY)
                    {
                        _bytesWriter.WriteVLong(state.TotalTermFreq - state.DocFreq);
                    }
                }
                _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
                _bytesWriter.WriteTo(_btw._output);
                _bytesWriter.Reset();

                // 4th pass: write the metadata
                var  longs    = new long[_longsSize];
                bool absolute = true;

                for (int termCount = 0; termCount < _pendingCount; termCount++)
                {
                    BlockTermState state = _pendingTerms[termCount].State;
                    _postingsWriter.EncodeTerm(longs, _bufferWriter, _fieldInfo, state, absolute);
                    for (int i = 0; i < _longsSize; i++)
                    {
                        _bytesWriter.WriteVLong(longs[i]);
                    }
                    _bufferWriter.WriteTo(_bytesWriter);
                    _bufferWriter.Reset();
                    absolute = false;
                }
                _btw._output.WriteVInt((int)_bytesWriter.FilePointer);
                _bytesWriter.WriteTo(_btw._output);
                _bytesWriter.Reset();

                _lastPrevTerm.CopyBytes(_pendingTerms[_pendingCount - 1].Term);
                _pendingCount = 0;
            }