/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n) { FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.Text); int df = 0; for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { df++; int doc = postings.Doc; if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space int freq = postings.Freq; FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq); if (!omitTermFreqAndPositions) { for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int payloadLength = postings.PayloadLength; if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) { payloadBuffer = new byte[payloadLength]; } postings.GetPayload(payloadBuffer, 0); } posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); } posConsumer.Finish(); } } } docConsumer.Finish(); return(df); }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n) { FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.Text); int df = 0; for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { df++; int doc = postings.Doc; if (docMap != null) doc = docMap[doc]; // map around deletions doc += base_Renamed; // convert to merged space int freq = postings.Freq; FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq); if (!omitTermFreqAndPositions) { for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int payloadLength = postings.PayloadLength; if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) payloadBuffer = new byte[payloadLength]; postings.GetPayload(payloadBuffer, 0); } posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); } posConsumer.Finish(); } } } docConsumer.Finish(); return df; }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc - smi.delCount); if (smi.Next()) queue.Add(smi); // initialize queue else smi.Dispose(); } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = queue.Pop(); top = queue.Top(); } if ((System.Object) currentField != (System.Object) term.Field) { currentField = term.Field; if (termsConsumer != null) termsConsumer.Finish(); FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) queue.Add(smi); // restore queue else smi.Dispose(); // done with a segment } } }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc - smi.delCount); if (smi.Next()) { queue.Add(smi); } // initialize queue else { smi.Dispose(); } } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = queue.Pop(); top = queue.Top(); } if ((System.Object)currentField != (System.Object)term.Field) { currentField = term.Field; if (termsConsumer != null) { termsConsumer.Finish(); } FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) { queue.Add(smi); } // restore queue else { smi.Dispose(); // done with a segment } } } }
/// <summary>Optimized implementation. </summary> public virtual int Read(int[] docs, int[] freqs) { while (true) { while (current == null) { if (pointer < readers.Length) { // try next segment if (tenum != null) { smi = tenum.matchingSegments[matchingSegmentPos++]; if (smi == null) { pointer = readers.Length; return 0; } pointer = smi.ord; } base_Renamed = starts[pointer]; current = TermDocs(pointer++); } else { return 0; } } int end = current.Read(docs, freqs); if (end == 0) { // none left in segment current = null; } else { // got some int b = base_Renamed; // adjust doc numbers for (int i = 0; i < end; i++) docs[i] += b; return end; } } }
public virtual bool Next() { for (; ; ) { if (current != null && current.Next()) { return true; } else if (pointer < readers.Length) { if (tenum != null) { smi = tenum.matchingSegments[matchingSegmentPos++]; if (smi == null) { pointer = readers.Length; return false; } pointer = smi.ord; } base_Renamed = starts[pointer]; current = TermDocs(pointer++); } else { return false; } } }
public virtual void Seek(Term term) { this.term = term; this.base_Renamed = 0; this.pointer = 0; this.current = null; this.tenum = null; this.smi = null; this.matchingSegmentPos = 0; }
internal SegmentMergeInfo[] matchingSegments; // null terminated array of matching segments public MultiTermEnum(IndexReader topReader, IndexReader[] readers, int[] starts, Term t) { this.topReader = topReader; queue = new SegmentMergeQueue(readers.Length); matchingSegments = new SegmentMergeInfo[readers.Length + 1]; for (int i = 0; i < readers.Length; i++) { IndexReader reader = readers[i]; TermEnum termEnum = t != null ? reader.Terms(t) : reader.Terms(); var smi = new SegmentMergeInfo(starts[i], termEnum, reader) {ord = i}; if (t == null?smi.Next():termEnum.Term != null) queue.Add(smi); // initialize queue else smi.Dispose(); } if (t != null && queue.Size() > 0) { Next(); } }