예제 #1
0
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
        {
            FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.Text);
            int df = 0;

            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.GetPositions();
                System.Diagnostics.Debug.Assert(postings != null);
                int   base_Renamed = smi.base_Renamed;
                int[] docMap       = smi.GetDocMap();
                postings.Seek(smi.termEnum);

                while (postings.Next())
                {
                    df++;
                    int doc = postings.Doc;
                    if (docMap != null)
                    {
                        doc = docMap[doc]; // map around deletions
                    }
                    doc += base_Renamed;   // convert to merged space

                    int freq = postings.Freq;
                    FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq);

                    if (!omitTermFreqAndPositions)
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            int position      = postings.NextPosition();
                            int payloadLength = postings.PayloadLength;
                            if (payloadLength > 0)
                            {
                                if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
                                {
                                    payloadBuffer = new byte[payloadLength];
                                }
                                postings.GetPayload(payloadBuffer, 0);
                            }
                            posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
                        }
                        posConsumer.Finish();
                    }
                }
            }
            docConsumer.Finish();

            return(df);
        }
예제 #2
0
        /* Walk through all unique text tokens (Posting
         * instances) found in this field and serialize them
         * into a single RAM segment. */
        internal void  AppendPostings(FreqProxTermsWriterPerField[] fields, FormatPostingsFieldsConsumer consumer)
        {
            int numFields = fields.Length;

            FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields];

            for (int i = 0; i < numFields; i++)
            {
                FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]);

                System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields [0].fieldInfo);

                // Should always be true
                bool result = fms.NextTerm();
                System.Diagnostics.Debug.Assert(result);
            }

            FormatPostingsTermsConsumer termsConsumer = consumer.AddField(fields[0].fieldInfo);

            FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];

            bool currentFieldOmitTermFreqAndPositions = fields[0].fieldInfo.omitTermFreqAndPositions;

            while (numFields > 0)
            {
                // Get the next term to merge
                termStates[0] = mergeStates[0];
                int numToMerge = 1;

                for (int i = 1; i < numFields; i++)
                {
                    char[] text       = mergeStates[i].text;
                    int    textOffset = mergeStates[i].textOffset;
                    int    cmp        = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset);

                    if (cmp < 0)
                    {
                        termStates[0] = mergeStates[i];
                        numToMerge    = 1;
                    }
                    else if (cmp == 0)
                    {
                        termStates[numToMerge++] = mergeStates[i];
                    }
                }

                FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(termStates[0].text, termStates[0].textOffset);

                // Now termStates has numToMerge FieldMergeStates
                // which all share the same term.  Now we must
                // interleave the docID streams.
                while (numToMerge > 0)
                {
                    FreqProxFieldMergeState minState = termStates[0];
                    for (int i = 1; i < numToMerge; i++)
                    {
                        if (termStates[i].docID < minState.docID)
                        {
                            minState = termStates[i];
                        }
                    }

                    int termDocFreq = minState.termFreq;

                    FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(minState.docID, termDocFreq);

                    ByteSliceReader prox = minState.prox;

                    // Carefully copy over the prox + payload info,
                    // changing the format to match Lucene's segment
                    // format.
                    if (!currentFieldOmitTermFreqAndPositions)
                    {
                        // omitTermFreqAndPositions == false so we do write positions &
                        // payload
                        int position = 0;
                        for (int j = 0; j < termDocFreq; j++)
                        {
                            int code = prox.ReadVInt();
                            position += (code >> 1);

                            int payloadLength;
                            if ((code & 1) != 0)
                            {
                                // This position has a payload
                                payloadLength = prox.ReadVInt();

                                if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
                                {
                                    payloadBuffer = new byte[payloadLength];
                                }

                                prox.ReadBytes(payloadBuffer, 0, payloadLength);
                            }
                            else
                            {
                                payloadLength = 0;
                            }

                            posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
                        }                         //End for

                        posConsumer.Finish();
                    }

                    if (!minState.NextDoc())
                    {
                        // Remove from termStates
                        int upto = 0;
                        for (int i = 0; i < numToMerge; i++)
                        {
                            if (termStates[i] != minState)
                            {
                                termStates[upto++] = termStates[i];
                            }
                        }
                        numToMerge--;
                        System.Diagnostics.Debug.Assert(upto == numToMerge);

                        // Advance this state to the next term

                        if (!minState.NextTerm())
                        {
                            // OK, no more terms, so remove from mergeStates
                            // as well
                            upto = 0;
                            for (int i = 0; i < numFields; i++)
                            {
                                if (mergeStates[i] != minState)
                                {
                                    mergeStates[upto++] = mergeStates[i];
                                }
                            }
                            numFields--;
                            System.Diagnostics.Debug.Assert(upto == numFields);
                        }
                    }
                }

                docConsumer.Finish();
            }

            termsConsumer.Finish();
        }