/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n) { FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.text); int df = 0; for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { df++; int doc = postings.Doc(); if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space int freq = postings.Freq(); FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq); if (!omitTermFreqAndPositions) { for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int payloadLength = postings.GetPayloadLength(); if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) { payloadBuffer = new byte[payloadLength]; } postings.GetPayload(payloadBuffer, 0); } posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); } posConsumer.Finish(); } } } docConsumer.Finish(); return(df); }
public override bool LessThan(System.Object a, System.Object b) { SegmentMergeInfo stiA = (SegmentMergeInfo)a; SegmentMergeInfo stiB = (SegmentMergeInfo)b; int comparison = stiA.term.CompareTo(stiB.term); if (comparison == 0) { return(stiA.base_Renamed < stiB.base_Renamed); } else { return(comparison < 0); } }
/// <summary>Optimized implementation. </summary> public virtual int Read(int[] docs, int[] freqs) { while (true) { while (current == null) { if (pointer < readers.Length) { // try next segment if (tenum != null) { smi = tenum.matchingSegments[matchingSegmentPos++]; if (smi == null) { pointer = readers.Length; return 0; } pointer = smi.ord; } base_Renamed = starts[pointer]; current = TermDocs(pointer++); } else { return 0; } } int end = current.Read(docs, freqs); if (end == 0) { // none left in segment current = null; } else { // got some int b = base_Renamed; // adjust doc numbers for (int i = 0; i < end; i++) docs[i] += b; return end; } } }
public virtual bool Next() { for (; ; ) { if (current != null && current.Next()) { return true; } else if (pointer < readers.Length) { if (tenum != null) { smi = tenum.matchingSegments[matchingSegmentPos++]; if (smi == null) { pointer = readers.Length; return false; } pointer = smi.ord; } base_Renamed = starts[pointer]; current = TermDocs(pointer++); } else { return false; } } }
public virtual void Seek(Term term) { this.term = term; this.base_Renamed = 0; this.pointer = 0; this.current = null; this.tenum = null; this.smi = null; this.matchingSegmentPos = 0; }
internal SegmentMergeInfo[] matchingSegments; // null terminated array of matching segments public MultiTermEnum(IndexReader topReader, IndexReader[] readers, int[] starts, Term t) { this.topReader = topReader; queue = new SegmentMergeQueue(readers.Length); matchingSegments = new SegmentMergeInfo[readers.Length + 1]; for (int i = 0; i < readers.Length; i++) { IndexReader reader = readers[i]; TermEnum termEnum; if (t != null) { termEnum = reader.Terms(t); } else termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader); smi.ord = i; if (t == null?smi.Next():termEnum.Term() != null) queue.Put(smi); // initialize queue else smi.Close(); } if (t != null && queue.Size() > 0) { Next(); } }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n) { FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.text); int df = 0; for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { df++; int doc = postings.Doc(); if (docMap != null) doc = docMap[doc]; // map around deletions doc += base_Renamed; // convert to merged space int freq = postings.Freq(); FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq); if (!omitTermFreqAndPositions) { for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int payloadLength = postings.GetPayloadLength(); if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) payloadBuffer = new byte[payloadLength]; postings.GetPayload(payloadBuffer, 0); } posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); } posConsumer.Finish(); } } } docConsumer.Finish(); return df; }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = (IndexReader) readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount); if (smi.Next()) queue.Add(smi); // initialize queue else smi.Close(); } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo) queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo) queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo) queue.Pop(); top = (SegmentMergeInfo) queue.Top(); } if ((System.Object) currentField != (System.Object) term.field) { currentField = term.field; if (termsConsumer != null) termsConsumer.Finish(); FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) queue.Add(smi); // restore queue else smi.Close(); // done with a segment } } }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = (IndexReader)readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount); if (smi.Next()) { queue.Add(smi); } // initialize queue else { smi.Close(); } } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo)queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo)queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo)queue.Pop(); top = (SegmentMergeInfo)queue.Top(); } if ((System.Object)currentField != (System.Object)term.field) { currentField = term.field; if (termsConsumer != null) { termsConsumer.Finish(); } FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) { queue.Add(smi); } // restore queue else { smi.Close(); // done with a segment } } } }