コード例 #1
0
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
        {
            FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.text);
            int df = 0;

            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.GetPositions();
                System.Diagnostics.Debug.Assert(postings != null);
                int   base_Renamed = smi.base_Renamed;
                int[] docMap       = smi.GetDocMap();
                postings.Seek(smi.termEnum);

                while (postings.Next())
                {
                    df++;
                    int doc = postings.Doc();
                    if (docMap != null)
                    {
                        doc = docMap[doc];                   // map around deletions
                    }
                    doc += base_Renamed;                     // convert to merged space

                    int freq = postings.Freq();
                    FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq);

                    if (!omitTermFreqAndPositions)
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            int position      = postings.NextPosition();
                            int payloadLength = postings.GetPayloadLength();
                            if (payloadLength > 0)
                            {
                                if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
                                {
                                    payloadBuffer = new byte[payloadLength];
                                }
                                postings.GetPayload(payloadBuffer, 0);
                            }
                            posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
                        }
                        posConsumer.Finish();
                    }
                }
            }
            docConsumer.Finish();

            return(df);
        }
コード例 #2
0
ファイル: SegmentMerger.cs プロジェクト: carrie901/mono
		private void  MergeTermInfos(FormatPostingsFieldsConsumer consumer)
		{
			int base_Renamed = 0;
			int readerCount = readers.Count;
			for (int i = 0; i < readerCount; i++)
			{
				IndexReader reader = (IndexReader) readers[i];
				TermEnum termEnum = reader.Terms();
				SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader);
				int[] docMap = smi.GetDocMap();
				if (docMap != null)
				{
					if (docMaps == null)
					{
						docMaps = new int[readerCount][];
						delCounts = new int[readerCount];
					}
					docMaps[i] = docMap;
					delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs();
				}
				
				base_Renamed += reader.NumDocs();
				
				System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount);
				
				if (smi.Next())
					queue.Add(smi);
				// initialize queue
				else
					smi.Close();
			}
			
			SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];
			
			System.String currentField = null;
			FormatPostingsTermsConsumer termsConsumer = null;
			
			while (queue.Size() > 0)
			{
				int matchSize = 0; // pop matching terms
				match[matchSize++] = (SegmentMergeInfo) queue.Pop();
				Term term = match[0].term;
				SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
				
				while (top != null && term.CompareTo(top.term) == 0)
				{
					match[matchSize++] = (SegmentMergeInfo) queue.Pop();
					top = (SegmentMergeInfo) queue.Top();
				}
				
				if ((System.Object) currentField != (System.Object) term.field)
				{
					currentField = term.field;
					if (termsConsumer != null)
						termsConsumer.Finish();
					FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField);
					termsConsumer = consumer.AddField(fieldInfo);
					omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
				}
				
				int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo
				
				checkAbort.Work(df / 3.0);
				
				while (matchSize > 0)
				{
					SegmentMergeInfo smi = match[--matchSize];
					if (smi.Next())
						queue.Add(smi);
					// restore queue
					else
						smi.Close(); // done with a segment
				}
			}
		}
コード例 #3
0
        private void  MergeTermInfos(FormatPostingsFieldsConsumer consumer)
        {
            int base_Renamed = 0;
            int readerCount  = readers.Count;

            for (int i = 0; i < readerCount; i++)
            {
                IndexReader      reader   = (IndexReader)readers[i];
                TermEnum         termEnum = reader.Terms();
                SegmentMergeInfo smi      = new SegmentMergeInfo(base_Renamed, termEnum, reader);
                int[]            docMap   = smi.GetDocMap();
                if (docMap != null)
                {
                    if (docMaps == null)
                    {
                        docMaps   = new int[readerCount][];
                        delCounts = new int[readerCount];
                    }
                    docMaps[i]   = docMap;
                    delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs();
                }

                base_Renamed += reader.NumDocs();

                System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount);

                if (smi.Next())
                {
                    queue.Add(smi);
                }
                // initialize queue
                else
                {
                    smi.Close();
                }
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            System.String currentField = null;
            FormatPostingsTermsConsumer termsConsumer = null;

            while (queue.Size() > 0)
            {
                int matchSize = 0;                 // pop matching terms
                match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                Term             term = match[0].term;
                SegmentMergeInfo top  = (SegmentMergeInfo)queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                    top = (SegmentMergeInfo)queue.Top();
                }

                if ((System.Object)currentField != (System.Object)term.field)
                {
                    currentField = term.field;
                    if (termsConsumer != null)
                    {
                        termsConsumer.Finish();
                    }
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField);
                    termsConsumer            = consumer.AddField(fieldInfo);
                    omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
                }

                int df = AppendPostings(termsConsumer, match, matchSize);                 // add new TermInfo

                checkAbort.Work(df / 3.0);

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                    {
                        queue.Add(smi);
                    }
                    // restore queue
                    else
                    {
                        smi.Close();                         // done with a segment
                    }
                }
            }
        }