public MultiTermEnum(IndexReader[] readers, int[] starts, Term t) { queue = new SegmentMergeQueue(readers.Length); for (int i = 0; i < readers.Length; i++) { IndexReader reader = readers[i]; TermEnum termEnum; if (t != null) { termEnum = reader.Terms(t); } else { termEnum = reader.Terms(); } SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader); if (t == null?smi.Next():termEnum.Term() != null) { queue.Put(smi); } // initialize queue else { smi.Close(); } } if (t != null && queue.Size() > 0) { Next(); } }
public override bool Next() { SegmentMergeInfo top = (SegmentMergeInfo)queue.Top(); if (top == null) { term = null; return(false); } term = top.term; docFreq = 0; while (top != null && term.CompareTo(top.term) == 0) { queue.Pop(); docFreq += top.termEnum.DocFreq(); // increment freq if (top.Next()) { queue.Put(top); } // restore queue else { top.Close(); // done with a segment } top = (SegmentMergeInfo)queue.Top(); } return(true); }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> private int AppendPostings(SegmentMergeInfo[] smis, int n) { int lastDoc = 0; int df = 0; // number of docs w/ term ResetSkip(); for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { int doc = postings.Doc(); if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space if (doc < lastDoc) { throw new System.SystemException("docs out of order (" + doc + " < " + lastDoc + " )"); } df++; if ((df % skipInterval) == 0) { BufferSkip(lastDoc); } int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1 lastDoc = doc; int freq = postings.Freq(); if (freq == 1) { freqOutput.WriteVInt(docCode | 1); // write doc & freq=1 } else { freqOutput.WriteVInt(docCode); // write doc freqOutput.WriteVInt(freq); // write frequency in doc } int lastPosition = 0; // write position deltas for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); proxOutput.WriteVInt(position - lastPosition); lastPosition = position; } } } return(df); }
private void MergeTermInfos() { int base_Renamed = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); base_Renamed += reader.NumDocs(); if (smi.Next()) { queue.Put(smi); } // initialize queue else { smi.Close(); } } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo)queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo)queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo)queue.Pop(); top = (SegmentMergeInfo)queue.Top(); } int df = MergeTermInfo(match, matchSize); // add new TermInfo if (checkAbort != null) { checkAbort.Work(df / 3.0); } while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) { queue.Put(smi); } // restore queue else { smi.Close(); // done with a segment } } } }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n) { FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.Text); int df = 0; for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { df++; int doc = postings.Doc; if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space int freq = postings.Freq; FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq); if (!omitTermFreqAndPositions) { for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int payloadLength = postings.PayloadLength; if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) { payloadBuffer = new byte[payloadLength]; } postings.GetPayload(payloadBuffer, 0); } posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); } posConsumer.Finish(); } } } docConsumer.Finish(); return(df); }
public override bool LessThan(System.Object a, System.Object b) { SegmentMergeInfo stiA = (SegmentMergeInfo)a; SegmentMergeInfo stiB = (SegmentMergeInfo)b; int comparison = stiA.term.CompareTo(stiB.term); if (comparison == 0) { return(stiA.base_Renamed < stiB.base_Renamed); } else { return(comparison < 0); } }
/// <summary> /// Process postings from multiple segments without tf, all positioned on the same term. /// Writes out merged entries only into freqOutput, proxOut is not written. /// </summary> /// <param name="smis">smis array of segments</param> /// <param name="n">number of cells in the array actually occupied</param> /// <returns></returns> private int AppendPostingsNoTf(SegmentMergeInfo[] smis, int n) { int lastDoc = 0; int df = 0; // number of docs w/ term skipListWriter.ResetSkip(); int lastPayloadLength = -1; // ensures that we write the first length for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { int doc = postings.Doc(); if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space if (doc < 0 || (df > 0 && doc <= lastDoc)) { throw new CorruptIndexException("docs out of order (" + doc + " <= " + lastDoc + " )"); } df++; if ((df % skipInterval) == 0) { skipListWriter.SetSkipData(lastDoc, false, lastPayloadLength); skipListWriter.BufferSkip(df); } int docCode = (doc - lastDoc); lastDoc = doc; freqOutput.WriteVInt(docCode); // write doc & freq=1 } } return(df); }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(SegmentMergeInfo[] smis, int n) { int lastDoc = 0; int df = 0; // number of docs w/ term skipListWriter.ResetSkip(); bool storePayloads = fieldInfos.FieldInfo(smis[0].term.field).storePayloads; int lastPayloadLength = -1; // ensures that we write the first length for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { int doc = postings.Doc(); if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space if (doc < 0 || (df > 0 && doc <= lastDoc)) { throw new CorruptIndexException("docs out of order (" + doc + " <= " + lastDoc + " )"); } df++; if ((df % skipInterval) == 0) { skipListWriter.SetSkipData(lastDoc, storePayloads, lastPayloadLength); skipListWriter.BufferSkip(df); } int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1 lastDoc = doc; int freq = postings.Freq(); if (freq == 1) { freqOutput.WriteVInt(docCode | 1); // write doc & freq=1 } else { freqOutput.WriteVInt(docCode); // write doc freqOutput.WriteVInt(freq); // write frequency in doc } /** See {@link DocumentWriter#writePostings(Posting[], String) for * documentation about the encoding of positions and payloads */ int lastPosition = 0; // write position deltas for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int delta = position - lastPosition; if (storePayloads) { int payloadLength = postings.GetPayloadLength(); if (payloadLength == lastPayloadLength) { proxOutput.WriteVInt(delta * 2); } else { proxOutput.WriteVInt(delta * 2 + 1); proxOutput.WriteVInt(payloadLength); lastPayloadLength = payloadLength; } if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) { payloadBuffer = new byte[payloadLength]; } postings.GetPayload(payloadBuffer, 0); proxOutput.WriteBytes(payloadBuffer, 0, payloadLength); } } else { proxOutput.WriteVInt(delta); } lastPosition = position; } } } return(df); }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc - smi.delCount); if (smi.Next()) { queue.Add(smi); } // initialize queue else { smi.Dispose(); } } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = queue.Pop(); top = queue.Top(); } if ((System.Object)currentField != (System.Object)term.Field) { currentField = term.Field; if (termsConsumer != null) { termsConsumer.Finish(); } FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) { queue.Add(smi); } // restore queue else { smi.Dispose(); // done with a segment } } } }
/// <summary> /// Process postings from multiple segments without tf, all positioned on the same term. /// Writes out merged entries only into freqOutput, proxOut is not written. /// </summary> /// <param name="smis">smis array of segments</param> /// <param name="n">number of cells in the array actually occupied</param> /// <returns></returns> private int AppendPostingsNoTf(SegmentMergeInfo[] smis, int n) { int lastDoc = 0; int df = 0; // number of docs w/ term skipListWriter.ResetSkip(); int lastPayloadLength = -1; // ensures that we write the first length for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { int doc = postings.Doc(); if (docMap != null) doc = docMap[doc]; // map around deletions doc += base_Renamed; // convert to merged space if (doc < 0 || (df > 0 && doc <= lastDoc)) throw new CorruptIndexException("docs out of order (" + doc + " <= " + lastDoc + " )"); df++; if ((df % skipInterval) == 0) { skipListWriter.SetSkipData(lastDoc, false, lastPayloadLength); skipListWriter.BufferSkip(df); } int docCode = (doc - lastDoc); lastDoc = doc; freqOutput.WriteVInt(docCode); // write doc & freq=1 } } return df; }
/// <summary>Merge one term found in one or more segments. The array <code>smis</code> /// contains segments that are positioned at the same term. <code>N</code> /// is the number of cells in the array actually occupied. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int MergeTermInfo(SegmentMergeInfo[] smis, int n) { long freqPointer = freqOutput.GetFilePointer(); long proxPointer; if (proxOutput != null) proxPointer = proxOutput.GetFilePointer(); else proxPointer = 0; int df; if (fieldInfos.FieldInfo(smis[0].term.field).omitTf) { // append posting data df = AppendPostingsNoTf(smis, n); } else { df = AppendPostings(smis, n); } long skipPointer = skipListWriter.WriteSkip(freqOutput); if (df > 0) { // add an entry to the dictionary with pointers to prox and freq files termInfo.Set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); termInfosWriter.Add(smis[0].term, termInfo); } return df; }
public MultiTermEnum(IndexReader[] readers, int[] starts, Term t) { queue = new SegmentMergeQueue(readers.Length); for (int i = 0; i < readers.Length; i++) { IndexReader reader = readers[i]; TermEnum termEnum; if (t != null) { termEnum = reader.Terms(t); } else termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader); if (t == null?smi.Next():termEnum.Term() != null) queue.Put(smi); // initialize queue else smi.Close(); } if (t != null && queue.Size() > 0) { Next(); } }
/// <summary>Optimized implementation. </summary> public virtual int Read(int[] docs, int[] freqs) { while (true) { while (current == null) { if (pointer < readers.Length) { // try next segment if (tenum != null) { smi = tenum.matchingSegments[matchingSegmentPos++]; if (smi == null) { pointer = readers.Length; return 0; } pointer = smi.ord; } base_Renamed = starts[pointer]; current = TermDocs(pointer++); } else { return 0; } } int end = current.Read(docs, freqs); if (end == 0) { // none left in segment current = null; } else { // got some int b = base_Renamed; // adjust doc numbers for (int i = 0; i < end; i++) docs[i] += b; return end; } } }
private void MergeTermInfos() { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = (IndexReader) readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); if (smi.Next()) queue.Put(smi);// initialize queue else smi.Close(); } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo) queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo) queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo) queue.Pop(); top = (SegmentMergeInfo) queue.Top(); } int df = MergeTermInfo(match, matchSize); // add new TermInfo if (checkAbort != null) checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) queue.Put(smi); // restore queue else smi.Close(); // done with a segment } } }
public virtual void Seek(Term term) { this.term = term; this.base_Renamed = 0; this.pointer = 0; this.current = null; this.tenum = null; this.smi = null; this.matchingSegmentPos = 0; }
public virtual bool Next() { for (; ; ) { if (current != null && current.Next()) { return true; } else if (pointer < readers.Length) { if (tenum != null) { smi = tenum.matchingSegments[matchingSegmentPos++]; if (smi == null) { pointer = readers.Length; return false; } pointer = smi.ord; } base_Renamed = starts[pointer]; current = TermDocs(pointer++); } else { return false; } } }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(SegmentMergeInfo[] smis, int n) { int lastDoc = 0; int df = 0; // number of docs w/ term skipListWriter.ResetSkip(); bool storePayloads = fieldInfos.FieldInfo(smis[0].term.field).storePayloads; int lastPayloadLength = - 1; // ensures that we write the first length for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { int doc = postings.Doc(); if (docMap != null) doc = docMap[doc]; // map around deletions doc += base_Renamed; // convert to merged space if (doc < 0 || (df > 0 && doc <= lastDoc)) throw new CorruptIndexException("docs out of order (" + doc + " <= " + lastDoc + " )"); df++; if ((df % skipInterval) == 0) { skipListWriter.SetSkipData(lastDoc, storePayloads, lastPayloadLength); skipListWriter.BufferSkip(df); } int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1 lastDoc = doc; int freq = postings.Freq(); if (freq == 1) { freqOutput.WriteVInt(docCode | 1); // write doc & freq=1 } else { freqOutput.WriteVInt(docCode); // write doc freqOutput.WriteVInt(freq); // write frequency in doc } /** See {@link DocumentWriter#writePostings(Posting[], String)} for * documentation about the encoding of positions and payloads */ int lastPosition = 0; // write position deltas for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int delta = position - lastPosition; if (storePayloads) { int payloadLength = postings.GetPayloadLength(); if (payloadLength == lastPayloadLength) { proxOutput.WriteVInt(delta * 2); } else { proxOutput.WriteVInt(delta * 2 + 1); proxOutput.WriteVInt(payloadLength); lastPayloadLength = payloadLength; } if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) { payloadBuffer = new byte[payloadLength]; } postings.GetPayload(payloadBuffer, 0); proxOutput.WriteBytes(payloadBuffer, 0, payloadLength); } } else { proxOutput.WriteVInt(delta); } lastPosition = position; } } } return df; }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = (IndexReader) readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount); if (smi.Next()) queue.Add(smi); // initialize queue else smi.Close(); } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo) queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo) queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo) queue.Pop(); top = (SegmentMergeInfo) queue.Top(); } if ((System.Object) currentField != (System.Object) term.field) { currentField = term.field; if (termsConsumer != null) termsConsumer.Finish(); FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) queue.Add(smi); // restore queue else smi.Close(); // done with a segment } } }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> private int AppendPostings(SegmentMergeInfo[] smis, int n) { int lastDoc = 0; int df = 0; // number of docs w/ term ResetSkip(); for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { int doc = postings.Doc(); if (docMap != null) doc = docMap[doc]; // map around deletions doc += base_Renamed; // convert to merged space if (doc < lastDoc) throw new System.SystemException("docs out of order (" + doc + " < " + lastDoc + " )"); df++; if ((df % skipInterval) == 0) { BufferSkip(lastDoc); } int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1 lastDoc = doc; int freq = postings.Freq(); if (freq == 1) { freqOutput.WriteVInt(docCode | 1); // write doc & freq=1 } else { freqOutput.WriteVInt(docCode); // write doc freqOutput.WriteVInt(freq); // write frequency in doc } int lastPosition = 0; // write position deltas for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); proxOutput.WriteVInt(position - lastPosition); lastPosition = position; } } } return df; }
private void MergeTermInfos() { int base_Renamed = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); base_Renamed += reader.NumDocs(); if (smi.Next()) queue.Put(smi); // initialize queue else smi.Close(); } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo) queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo) queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo) queue.Pop(); top = (SegmentMergeInfo) queue.Top(); } MergeTermInfo(match, matchSize); // add new TermInfo while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) queue.Put(smi); // restore queue else smi.Close(); // done with a segment } } }
/// <summary>Merge one term found in one or more segments. The array <code>smis</code> /// contains segments that are positioned at the same term. <code>N</code> /// is the number of cells in the array actually occupied. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> private void MergeTermInfo(SegmentMergeInfo[] smis, int n) { long freqPointer = freqOutput.GetFilePointer(); long proxPointer = proxOutput.GetFilePointer(); int df = AppendPostings(smis, n); // append posting data long skipPointer = WriteSkip(); if (df > 0) { // add an entry to the dictionary with pointers to prox and freq files termInfo.Set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); termInfosWriter.Add(smis[0].term, termInfo); } }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n) { FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.text); int df = 0; for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { df++; int doc = postings.Doc(); if (docMap != null) doc = docMap[doc]; // map around deletions doc += base_Renamed; // convert to merged space int freq = postings.Freq(); FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq); if (!omitTermFreqAndPositions) { for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int payloadLength = postings.GetPayloadLength(); if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) payloadBuffer = new byte[payloadLength]; postings.GetPayload(payloadBuffer, 0); } posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); } posConsumer.Finish(); } } } docConsumer.Finish(); return df; }
internal SegmentMergeInfo[] matchingSegments; // null terminated array of matching segments public MultiTermEnum(IndexReader topReader, IndexReader[] readers, int[] starts, Term t) { this.topReader = topReader; queue = new SegmentMergeQueue(readers.Length); matchingSegments = new SegmentMergeInfo[readers.Length + 1]; for (int i = 0; i < readers.Length; i++) { IndexReader reader = readers[i]; TermEnum termEnum = t != null ? reader.Terms(t) : reader.Terms(); var smi = new SegmentMergeInfo(starts[i], termEnum, reader) {ord = i}; if (t == null?smi.Next():termEnum.Term != null) queue.Add(smi); // initialize queue else smi.Dispose(); } if (t != null && queue.Size() > 0) { Next(); } }