Example #1
0
        public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
        {
            queue = new SegmentMergeQueue(readers.Length);
            for (int i = 0; i < readers.Length; i++)
            {
                IndexReader reader = readers[i];
                TermEnum    termEnum;

                if (t != null)
                {
                    termEnum = reader.Terms(t);
                }
                else
                {
                    termEnum = reader.Terms();
                }

                SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
                if (t == null?smi.Next():termEnum.Term() != null)
                {
                    queue.Put(smi);
                }
                // initialize queue
                else
                {
                    smi.Close();
                }
            }

            if (t != null && queue.Size() > 0)
            {
                Next();
            }
        }
Example #2
0
        public override bool Next()
        {
            SegmentMergeInfo top = (SegmentMergeInfo)queue.Top();

            if (top == null)
            {
                term = null;
                return(false);
            }

            term    = top.term;
            docFreq = 0;

            while (top != null && term.CompareTo(top.term) == 0)
            {
                queue.Pop();
                docFreq += top.termEnum.DocFreq();                 // increment freq
                if (top.Next())
                {
                    queue.Put(top);
                }
                // restore queue
                else
                {
                    top.Close();                     // done with a segment
                }
                top = (SegmentMergeInfo)queue.Top();
            }
            return(true);
        }
Example #3
0
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        private int AppendPostings(SegmentMergeInfo[] smis, int n)
        {
            int lastDoc = 0;
            int df      = 0;        // number of docs w/ term

            ResetSkip();
            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.GetPositions();
                int   base_Renamed        = smi.base_Renamed;
                int[] docMap = smi.GetDocMap();
                postings.Seek(smi.termEnum);
                while (postings.Next())
                {
                    int doc = postings.Doc();
                    if (docMap != null)
                    {
                        doc = docMap[doc];                   // map around deletions
                    }
                    doc += base_Renamed;                     // convert to merged space

                    if (doc < lastDoc)
                    {
                        throw new System.SystemException("docs out of order (" + doc + " < " + lastDoc + " )");
                    }

                    df++;

                    if ((df % skipInterval) == 0)
                    {
                        BufferSkip(lastDoc);
                    }

                    int docCode = (doc - lastDoc) << 1;                     // use low bit to flag freq=1
                    lastDoc = doc;

                    int freq = postings.Freq();
                    if (freq == 1)
                    {
                        freqOutput.WriteVInt(docCode | 1);                         // write doc & freq=1
                    }
                    else
                    {
                        freqOutput.WriteVInt(docCode);                      // write doc
                        freqOutput.WriteVInt(freq);                         // write frequency in doc
                    }

                    int lastPosition = 0;                     // write position deltas
                    for (int j = 0; j < freq; j++)
                    {
                        int position = postings.NextPosition();
                        proxOutput.WriteVInt(position - lastPosition);
                        lastPosition = position;
                    }
                }
            }
            return(df);
        }
Example #4
0
        private void  MergeTermInfos()
        {
            int base_Renamed = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader      reader   = (IndexReader)readers[i];
                TermEnum         termEnum = reader.Terms();
                SegmentMergeInfo smi      = new SegmentMergeInfo(base_Renamed, termEnum, reader);
                base_Renamed += reader.NumDocs();
                if (smi.Next())
                {
                    queue.Put(smi);
                }
                // initialize queue
                else
                {
                    smi.Close();
                }
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            while (queue.Size() > 0)
            {
                int matchSize = 0;                 // pop matching terms
                match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                Term             term = match[0].term;
                SegmentMergeInfo top  = (SegmentMergeInfo)queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                    top = (SegmentMergeInfo)queue.Top();
                }

                int df = MergeTermInfo(match, matchSize);                 // add new TermInfo

                if (checkAbort != null)
                {
                    checkAbort.Work(df / 3.0);
                }

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                    {
                        queue.Put(smi);
                    }
                    // restore queue
                    else
                    {
                        smi.Close();                         // done with a segment
                    }
                }
            }
        }
Example #5
0
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
        {
            FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.Text);
            int df = 0;

            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.GetPositions();
                System.Diagnostics.Debug.Assert(postings != null);
                int   base_Renamed = smi.base_Renamed;
                int[] docMap       = smi.GetDocMap();
                postings.Seek(smi.termEnum);

                while (postings.Next())
                {
                    df++;
                    int doc = postings.Doc;
                    if (docMap != null)
                    {
                        doc = docMap[doc]; // map around deletions
                    }
                    doc += base_Renamed;   // convert to merged space

                    int freq = postings.Freq;
                    FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq);

                    if (!omitTermFreqAndPositions)
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            int position      = postings.NextPosition();
                            int payloadLength = postings.PayloadLength;
                            if (payloadLength > 0)
                            {
                                if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
                                {
                                    payloadBuffer = new byte[payloadLength];
                                }
                                postings.GetPayload(payloadBuffer, 0);
                            }
                            posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
                        }
                        posConsumer.Finish();
                    }
                }
            }
            docConsumer.Finish();

            return(df);
        }
Example #6
0
        public override bool LessThan(System.Object a, System.Object b)
        {
            SegmentMergeInfo stiA = (SegmentMergeInfo)a;
            SegmentMergeInfo stiB = (SegmentMergeInfo)b;
            int comparison        = stiA.term.CompareTo(stiB.term);

            if (comparison == 0)
            {
                return(stiA.base_Renamed < stiB.base_Renamed);
            }
            else
            {
                return(comparison < 0);
            }
        }
Example #7
0
        /// <summary>
        /// Process postings from multiple segments without tf, all positioned on the same term.
        /// Writes out merged entries only into freqOutput, proxOut is not written.
        /// </summary>
        /// <param name="smis">smis array of segments</param>
        /// <param name="n">number of cells in the array actually occupied</param>
        /// <returns></returns>
        private int AppendPostingsNoTf(SegmentMergeInfo[] smis, int n)
        {
            int lastDoc = 0;
            int df      = 0;      // number of docs w/ term

            skipListWriter.ResetSkip();
            int lastPayloadLength = -1;   // ensures that we write the first length

            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.GetPositions();
                System.Diagnostics.Debug.Assert(postings != null);
                int   base_Renamed = smi.base_Renamed;
                int[] docMap       = smi.GetDocMap();
                postings.Seek(smi.termEnum);
                while (postings.Next())
                {
                    int doc = postings.Doc();
                    if (docMap != null)
                    {
                        doc = docMap[doc];                      // map around deletions
                    }
                    doc += base_Renamed;                        // convert to merged space

                    if (doc < 0 || (df > 0 && doc <= lastDoc))
                    {
                        throw new CorruptIndexException("docs out of order (" + doc +
                                                        " <= " + lastDoc + " )");
                    }

                    df++;

                    if ((df % skipInterval) == 0)
                    {
                        skipListWriter.SetSkipData(lastDoc, false, lastPayloadLength);
                        skipListWriter.BufferSkip(df);
                    }

                    int docCode = (doc - lastDoc);
                    lastDoc = doc;
                    freqOutput.WriteVInt(docCode);    // write doc & freq=1
                }
            }
            return(df);
        }
Example #8
0
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int AppendPostings(SegmentMergeInfo[] smis, int n)
        {
            int lastDoc = 0;
            int df      = 0;        // number of docs w/ term

            skipListWriter.ResetSkip();
            bool storePayloads     = fieldInfos.FieldInfo(smis[0].term.field).storePayloads;
            int  lastPayloadLength = -1;             // ensures that we write the first length

            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.GetPositions();
                System.Diagnostics.Debug.Assert(postings != null);
                int   base_Renamed = smi.base_Renamed;
                int[] docMap       = smi.GetDocMap();
                postings.Seek(smi.termEnum);
                while (postings.Next())
                {
                    int doc = postings.Doc();
                    if (docMap != null)
                    {
                        doc = docMap[doc];                   // map around deletions
                    }
                    doc += base_Renamed;                     // convert to merged space

                    if (doc < 0 || (df > 0 && doc <= lastDoc))
                    {
                        throw new CorruptIndexException("docs out of order (" + doc + " <= " + lastDoc + " )");
                    }

                    df++;

                    if ((df % skipInterval) == 0)
                    {
                        skipListWriter.SetSkipData(lastDoc, storePayloads, lastPayloadLength);
                        skipListWriter.BufferSkip(df);
                    }

                    int docCode = (doc - lastDoc) << 1;                     // use low bit to flag freq=1
                    lastDoc = doc;

                    int freq = postings.Freq();
                    if (freq == 1)
                    {
                        freqOutput.WriteVInt(docCode | 1);                         // write doc & freq=1
                    }
                    else
                    {
                        freqOutput.WriteVInt(docCode);                      // write doc
                        freqOutput.WriteVInt(freq);                         // write frequency in doc
                    }

                    /** See {@link DocumentWriter#writePostings(Posting[], String) for
                     *  documentation about the encoding of positions and payloads
                     */
                    int lastPosition = 0;                     // write position deltas
                    for (int j = 0; j < freq; j++)
                    {
                        int position = postings.NextPosition();
                        int delta    = position - lastPosition;
                        if (storePayloads)
                        {
                            int payloadLength = postings.GetPayloadLength();
                            if (payloadLength == lastPayloadLength)
                            {
                                proxOutput.WriteVInt(delta * 2);
                            }
                            else
                            {
                                proxOutput.WriteVInt(delta * 2 + 1);
                                proxOutput.WriteVInt(payloadLength);
                                lastPayloadLength = payloadLength;
                            }
                            if (payloadLength > 0)
                            {
                                if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
                                {
                                    payloadBuffer = new byte[payloadLength];
                                }
                                postings.GetPayload(payloadBuffer, 0);
                                proxOutput.WriteBytes(payloadBuffer, 0, payloadLength);
                            }
                        }
                        else
                        {
                            proxOutput.WriteVInt(delta);
                        }
                        lastPosition = position;
                    }
                }
            }
            return(df);
        }
Example #9
0
        private void  MergeTermInfos(FormatPostingsFieldsConsumer consumer)
        {
            int base_Renamed = 0;
            int readerCount  = readers.Count;

            for (int i = 0; i < readerCount; i++)
            {
                IndexReader      reader   = readers[i];
                TermEnum         termEnum = reader.Terms();
                SegmentMergeInfo smi      = new SegmentMergeInfo(base_Renamed, termEnum, reader);
                int[]            docMap   = smi.GetDocMap();
                if (docMap != null)
                {
                    if (docMaps == null)
                    {
                        docMaps   = new int[readerCount][];
                        delCounts = new int[readerCount];
                    }
                    docMaps[i]   = docMap;
                    delCounts[i] = smi.reader.MaxDoc - smi.reader.NumDocs();
                }

                base_Renamed += reader.NumDocs();

                System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc - smi.delCount);

                if (smi.Next())
                {
                    queue.Add(smi);
                }
                // initialize queue
                else
                {
                    smi.Dispose();
                }
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            System.String currentField = null;
            FormatPostingsTermsConsumer termsConsumer = null;

            while (queue.Size() > 0)
            {
                int matchSize = 0; // pop matching terms
                match[matchSize++] = queue.Pop();
                Term             term = match[0].term;
                SegmentMergeInfo top  = queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = queue.Pop();
                    top = queue.Top();
                }

                if ((System.Object)currentField != (System.Object)term.Field)
                {
                    currentField = term.Field;
                    if (termsConsumer != null)
                    {
                        termsConsumer.Finish();
                    }
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField);
                    termsConsumer            = consumer.AddField(fieldInfo);
                    omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
                }

                int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo

                checkAbort.Work(df / 3.0);

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                    {
                        queue.Add(smi);
                    }
                    // restore queue
                    else
                    {
                        smi.Dispose(); // done with a segment
                    }
                }
            }
        }
        /// <summary>
        /// Process postings from multiple segments without tf, all positioned on the same term.
        /// Writes out merged entries only into freqOutput, proxOut is not written.
        /// </summary>
        /// <param name="smis">smis array of segments</param>
        /// <param name="n">number of cells in the array actually occupied</param>
        /// <returns></returns>
        private int AppendPostingsNoTf(SegmentMergeInfo[] smis, int n)
        {
            int lastDoc = 0;
            int df = 0;           // number of docs w/ term
            skipListWriter.ResetSkip();
            int lastPayloadLength = -1;   // ensures that we write the first length
            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi = smis[i];
                TermPositions postings = smi.GetPositions();
                System.Diagnostics.Debug.Assert(postings != null);
                int base_Renamed = smi.base_Renamed;
                int[] docMap = smi.GetDocMap();
                postings.Seek(smi.termEnum);
                while (postings.Next())
                {
                    int doc = postings.Doc();
                    if (docMap != null)
                        doc = docMap[doc];                      // map around deletions
                    doc += base_Renamed;                              // convert to merged space

                    if (doc < 0 || (df > 0 && doc <= lastDoc))
                        throw new CorruptIndexException("docs out of order (" + doc +
                            " <= " + lastDoc + " )");

                    df++;

                    if ((df % skipInterval) == 0)
                    {
                        skipListWriter.SetSkipData(lastDoc, false, lastPayloadLength);
                        skipListWriter.BufferSkip(df);
                    }

                    int docCode = (doc - lastDoc);
                    lastDoc = doc;
                    freqOutput.WriteVInt(docCode);    // write doc & freq=1
                }
            }
            return df;
        }
        /// <summary>Merge one term found in one or more segments. The array <code>smis</code>
        /// contains segments that are positioned at the same term. <code>N</code>
        /// is the number of cells in the array actually occupied.
        /// 
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeTermInfo(SegmentMergeInfo[] smis, int n)
        {
            long freqPointer = freqOutput.GetFilePointer();
            long proxPointer;
            if (proxOutput != null)
                proxPointer = proxOutput.GetFilePointer();
            else
                proxPointer = 0;

            int df;
            if (fieldInfos.FieldInfo(smis[0].term.field).omitTf)
            {
                // append posting data
                df = AppendPostingsNoTf(smis, n);
            }
            else
            {
                df = AppendPostings(smis, n);
            }

            long skipPointer = skipListWriter.WriteSkip(freqOutput);

            if (df > 0)
            {
                // add an entry to the dictionary with pointers to prox and freq files
                termInfo.Set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
                termInfosWriter.Add(smis[0].term, termInfo);
            }

            return df;
        }
Example #12
0
		public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
		{
			queue = new SegmentMergeQueue(readers.Length);
			for (int i = 0; i < readers.Length; i++)
			{
				IndexReader reader = readers[i];
				TermEnum termEnum;
				
				if (t != null)
				{
					termEnum = reader.Terms(t);
				}
				else
					termEnum = reader.Terms();
				
				SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
				if (t == null?smi.Next():termEnum.Term() != null)
					queue.Put(smi);
				// initialize queue
				else
					smi.Close();
			}
			
			if (t != null && queue.Size() > 0)
			{
				Next();
			}
		}
Example #13
0
 /// <summary>Optimized implementation. </summary>
 public virtual int Read(int[] docs, int[] freqs)
 {
     while (true)
     {
         while (current == null)
         {
             if (pointer < readers.Length)
             {
                 // try next segment
                 if (tenum != null)
                 {
                     smi = tenum.matchingSegments[matchingSegmentPos++];
                     if (smi == null)
                     {
                         pointer = readers.Length;
                         return 0;
                     }
                     pointer = smi.ord;
                 }
                 base_Renamed = starts[pointer];
                 current = TermDocs(pointer++);
             }
             else
             {
                 return 0;
             }
         }
         int end = current.Read(docs, freqs);
         if (end == 0)
         {
             // none left in segment
             current = null;
         }
         else
         {
             // got some
             int b = base_Renamed; // adjust doc numbers
             for (int i = 0; i < end; i++)
                 docs[i] += b;
             return end;
         }
     }
 }
        private void MergeTermInfos()
        {
            int base_Renamed = 0;
            int readerCount = readers.Count;
            for (int i = 0; i < readerCount; i++)
            {
                IndexReader reader = (IndexReader) readers[i];
                TermEnum termEnum = reader.Terms();
                SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader);

                int[] docMap = smi.GetDocMap();
                if (docMap != null)
                {
                    if (docMaps == null)
                    {
                        docMaps = new int[readerCount][];
                        delCounts = new int[readerCount];
                    }
                    docMaps[i] = docMap;
                    delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs();
                }

                base_Renamed += reader.NumDocs();
                if (smi.Next())
                    queue.Put(smi);// initialize queue
                else
                    smi.Close();
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            while (queue.Size() > 0)
            {
                int matchSize = 0; // pop matching terms
                match[matchSize++] = (SegmentMergeInfo) queue.Pop();
                Term term = match[0].term;
                SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = (SegmentMergeInfo) queue.Pop();
                    top = (SegmentMergeInfo) queue.Top();
                }

                int df = MergeTermInfo(match, matchSize); // add new TermInfo

                if (checkAbort != null)
                    checkAbort.Work(df / 3.0);

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                        queue.Put(smi);
                    // restore queue
                    else
                        smi.Close(); // done with a segment
                }
            }
        }
Example #15
0
 public virtual void  Seek(Term term)
 {
     this.term = term;
     this.base_Renamed = 0;
     this.pointer = 0;
     this.current = null;
     this.tenum = null;
     this.smi = null;
     this.matchingSegmentPos = 0;
 }
Example #16
0
 public virtual bool Next()
 {
     for (; ; )
     {
         if (current != null && current.Next())
         {
             return true;
         }
         else if (pointer < readers.Length)
         {
             if (tenum != null)
             {
                 smi = tenum.matchingSegments[matchingSegmentPos++];
                 if (smi == null)
                 {
                     pointer = readers.Length;
                     return false;
                 }
                 pointer = smi.ord;
             }
             base_Renamed = starts[pointer];
             current = TermDocs(pointer++);
         }
         else
         {
             return false;
         }
     }
 }
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        /// 
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int AppendPostings(SegmentMergeInfo[] smis, int n)
        {
            int lastDoc = 0;
            int df = 0; // number of docs w/ term
            skipListWriter.ResetSkip();
            bool storePayloads = fieldInfos.FieldInfo(smis[0].term.field).storePayloads;
            int lastPayloadLength = - 1; // ensures that we write the first length
            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi = smis[i];
                TermPositions postings = smi.GetPositions();
                System.Diagnostics.Debug.Assert(postings != null);
                int base_Renamed = smi.base_Renamed;
                int[] docMap = smi.GetDocMap();
                postings.Seek(smi.termEnum);
                while (postings.Next())
                {
                    int doc = postings.Doc();
                    if (docMap != null)
                        doc = docMap[doc]; // map around deletions
                    doc += base_Renamed; // convert to merged space

                    if (doc < 0 || (df > 0 && doc <= lastDoc))
                        throw new CorruptIndexException("docs out of order (" + doc + " <= " + lastDoc + " )");

                    df++;

                    if ((df % skipInterval) == 0)
                    {
                        skipListWriter.SetSkipData(lastDoc, storePayloads, lastPayloadLength);
                        skipListWriter.BufferSkip(df);
                    }

                    int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
                    lastDoc = doc;

                    int freq = postings.Freq();
                    if (freq == 1)
                    {
                        freqOutput.WriteVInt(docCode | 1); // write doc & freq=1
                    }
                    else
                    {
                        freqOutput.WriteVInt(docCode); // write doc
                        freqOutput.WriteVInt(freq); // write frequency in doc
                    }

                    /** See {@link DocumentWriter#writePostings(Posting[], String)} for
                    *  documentation about the encoding of positions and payloads
                    */
                    int lastPosition = 0; // write position deltas
                    for (int j = 0; j < freq; j++)
                    {
                        int position = postings.NextPosition();
                        int delta = position - lastPosition;
                        if (storePayloads)
                        {
                            int payloadLength = postings.GetPayloadLength();
                            if (payloadLength == lastPayloadLength)
                            {
                                proxOutput.WriteVInt(delta * 2);
                            }
                            else
                            {
                                proxOutput.WriteVInt(delta * 2 + 1);
                                proxOutput.WriteVInt(payloadLength);
                                lastPayloadLength = payloadLength;
                            }
                            if (payloadLength > 0)
                            {
                                if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
                                {
                                    payloadBuffer = new byte[payloadLength];
                                }
                                postings.GetPayload(payloadBuffer, 0);
                                proxOutput.WriteBytes(payloadBuffer, 0, payloadLength);
                            }
                        }
                        else
                        {
                            proxOutput.WriteVInt(delta);
                        }
                        lastPosition = position;
                    }
                }
            }
            return df;
        }
Example #18
0
		private void  MergeTermInfos(FormatPostingsFieldsConsumer consumer)
		{
			int base_Renamed = 0;
			int readerCount = readers.Count;
			for (int i = 0; i < readerCount; i++)
			{
				IndexReader reader = (IndexReader) readers[i];
				TermEnum termEnum = reader.Terms();
				SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader);
				int[] docMap = smi.GetDocMap();
				if (docMap != null)
				{
					if (docMaps == null)
					{
						docMaps = new int[readerCount][];
						delCounts = new int[readerCount];
					}
					docMaps[i] = docMap;
					delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs();
				}
				
				base_Renamed += reader.NumDocs();
				
				System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount);
				
				if (smi.Next())
					queue.Add(smi);
				// initialize queue
				else
					smi.Close();
			}
			
			SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];
			
			System.String currentField = null;
			FormatPostingsTermsConsumer termsConsumer = null;
			
			while (queue.Size() > 0)
			{
				int matchSize = 0; // pop matching terms
				match[matchSize++] = (SegmentMergeInfo) queue.Pop();
				Term term = match[0].term;
				SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
				
				while (top != null && term.CompareTo(top.term) == 0)
				{
					match[matchSize++] = (SegmentMergeInfo) queue.Pop();
					top = (SegmentMergeInfo) queue.Top();
				}
				
				if ((System.Object) currentField != (System.Object) term.field)
				{
					currentField = term.field;
					if (termsConsumer != null)
						termsConsumer.Finish();
					FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField);
					termsConsumer = consumer.AddField(fieldInfo);
					omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
				}
				
				int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo
				
				checkAbort.Work(df / 3.0);
				
				while (matchSize > 0)
				{
					SegmentMergeInfo smi = match[--matchSize];
					if (smi.Next())
						queue.Add(smi);
					// restore queue
					else
						smi.Close(); // done with a segment
				}
			}
		}
Example #19
0
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        /// 
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        private int AppendPostings(SegmentMergeInfo[] smis, int n)
        {
            int lastDoc = 0;
            int df = 0; // number of docs w/ term
            ResetSkip();
            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi = smis[i];
                TermPositions postings = smi.GetPositions();
                int base_Renamed = smi.base_Renamed;
                int[] docMap = smi.GetDocMap();
                postings.Seek(smi.termEnum);
                while (postings.Next())
                {
                    int doc = postings.Doc();
                    if (docMap != null)
                        doc = docMap[doc]; // map around deletions
                    doc += base_Renamed; // convert to merged space

                    if (doc < lastDoc)
                        throw new System.SystemException("docs out of order (" + doc + " < " + lastDoc + " )");

                    df++;

                    if ((df % skipInterval) == 0)
                    {
                        BufferSkip(lastDoc);
                    }

                    int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
                    lastDoc = doc;

                    int freq = postings.Freq();
                    if (freq == 1)
                    {
                        freqOutput.WriteVInt(docCode | 1); // write doc & freq=1
                    }
                    else
                    {
                        freqOutput.WriteVInt(docCode); // write doc
                        freqOutput.WriteVInt(freq); // write frequency in doc
                    }

                    int lastPosition = 0; // write position deltas
                    for (int j = 0; j < freq; j++)
                    {
                        int position = postings.NextPosition();
                        proxOutput.WriteVInt(position - lastPosition);
                        lastPosition = position;
                    }
                }
            }
            return df;
        }
Example #20
0
        private void MergeTermInfos()
        {
            int base_Renamed = 0;
            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader) readers[i];
                TermEnum termEnum = reader.Terms();
                SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader);
                base_Renamed += reader.NumDocs();
                if (smi.Next())
                    queue.Put(smi);
                // initialize queue
                else
                    smi.Close();
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            while (queue.Size() > 0)
            {
                int matchSize = 0; // pop matching terms
                match[matchSize++] = (SegmentMergeInfo) queue.Pop();
                Term term = match[0].term;
                SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = (SegmentMergeInfo) queue.Pop();
                    top = (SegmentMergeInfo) queue.Top();
                }

                MergeTermInfo(match, matchSize); // add new TermInfo

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                        queue.Put(smi);
                    // restore queue
                    else
                        smi.Close(); // done with a segment
                }
            }
        }
Example #21
0
        /// <summary>Merge one term found in one or more segments. The array <code>smis</code>
        /// contains segments that are positioned at the same term. <code>N</code>
        /// is the number of cells in the array actually occupied.
        /// 
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        private void MergeTermInfo(SegmentMergeInfo[] smis, int n)
        {
            long freqPointer = freqOutput.GetFilePointer();
            long proxPointer = proxOutput.GetFilePointer();

            int df = AppendPostings(smis, n); // append posting data

            long skipPointer = WriteSkip();

            if (df > 0)
            {
                // add an entry to the dictionary with pointers to prox and freq files
                termInfo.Set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
                termInfosWriter.Add(smis[0].term, termInfo);
            }
        }
Example #22
0
		/// <summary>Process postings from multiple segments all positioned on the
		/// same term. Writes out merged entries into freqOutput and
		/// the proxOutput streams.
		/// 
		/// </summary>
		/// <param name="smis">array of segments
		/// </param>
		/// <param name="n">number of cells in the array actually occupied
		/// </param>
		/// <returns> number of documents across all segments where this term was found
		/// </returns>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
		{
			
			FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.text);
			int df = 0;
			for (int i = 0; i < n; i++)
			{
				SegmentMergeInfo smi = smis[i];
				TermPositions postings = smi.GetPositions();
				System.Diagnostics.Debug.Assert(postings != null);
				int base_Renamed = smi.base_Renamed;
				int[] docMap = smi.GetDocMap();
				postings.Seek(smi.termEnum);
				
				while (postings.Next())
				{
					df++;
					int doc = postings.Doc();
					if (docMap != null)
						doc = docMap[doc]; // map around deletions
					doc += base_Renamed; // convert to merged space
					
					int freq = postings.Freq();
					FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq);
					
					if (!omitTermFreqAndPositions)
					{
						for (int j = 0; j < freq; j++)
						{
							int position = postings.NextPosition();
							int payloadLength = postings.GetPayloadLength();
							if (payloadLength > 0)
							{
								if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
									payloadBuffer = new byte[payloadLength];
								postings.GetPayload(payloadBuffer, 0);
							}
							posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
						}
						posConsumer.Finish();
					}
				}
			}
			docConsumer.Finish();
			
			return df;
		}
Example #23
0
            internal SegmentMergeInfo[] matchingSegments; // null terminated array of matching segments
            
            public MultiTermEnum(IndexReader topReader, IndexReader[] readers, int[] starts, Term t)
            {
                this.topReader = topReader;
                queue = new SegmentMergeQueue(readers.Length);
                matchingSegments = new SegmentMergeInfo[readers.Length + 1];
                for (int i = 0; i < readers.Length; i++)
                {
                    IndexReader reader = readers[i];

                	TermEnum termEnum = t != null ? reader.Terms(t) : reader.Terms();

                	var smi = new SegmentMergeInfo(starts[i], termEnum, reader) {ord = i};
                	if (t == null?smi.Next():termEnum.Term != null)
                        queue.Add(smi);
                    // initialize queue
                    else
                        smi.Dispose();
                }
                
                if (t != null && queue.Size() > 0)
                {
                    Next();
                }
            }