/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n) { FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.text); int df = 0; for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { df++; int doc = postings.Doc(); if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space int freq = postings.Freq(); FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq); if (!omitTermFreqAndPositions) { for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int payloadLength = postings.GetPayloadLength(); if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) { payloadBuffer = new byte[payloadLength]; } postings.GetPayload(payloadBuffer, 0); } posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); } posConsumer.Finish(); } } } docConsumer.Finish(); return(df); }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = (IndexReader) readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount); if (smi.Next()) queue.Add(smi); // initialize queue else smi.Close(); } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo) queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo) queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo) queue.Pop(); top = (SegmentMergeInfo) queue.Top(); } if ((System.Object) currentField != (System.Object) term.field) { currentField = term.field; if (termsConsumer != null) termsConsumer.Finish(); FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) queue.Add(smi); // restore queue else smi.Close(); // done with a segment } } }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = (IndexReader)readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount); if (smi.Next()) { queue.Add(smi); } // initialize queue else { smi.Close(); } } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo)queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo)queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo)queue.Pop(); top = (SegmentMergeInfo)queue.Top(); } if ((System.Object)currentField != (System.Object)term.field) { currentField = term.field; if (termsConsumer != null) { termsConsumer.Finish(); } FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) { queue.Add(smi); } // restore queue else { smi.Close(); // done with a segment } } } }