/* Walk through all unique text tokens (Posting * instances) found in this field and serialize them * into a single RAM segment. */ internal void AppendPostings(FreqProxTermsWriterPerField[] fields, FormatPostingsFieldsConsumer consumer) { int numFields = fields.Length; FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields]; for (int i = 0; i < numFields; i++) { FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]); System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields [0].fieldInfo); // Should always be true bool result = fms.NextTerm(); System.Diagnostics.Debug.Assert(result); } FormatPostingsTermsConsumer termsConsumer = consumer.AddField(fields[0].fieldInfo); FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields]; bool currentFieldOmitTermFreqAndPositions = fields[0].fieldInfo.omitTermFreqAndPositions; while (numFields > 0) { // Get the next term to merge termStates[0] = mergeStates[0]; int numToMerge = 1; for (int i = 1; i < numFields; i++) { char[] text = mergeStates[i].text; int textOffset = mergeStates[i].textOffset; int cmp = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset); if (cmp < 0) { termStates[0] = mergeStates[i]; numToMerge = 1; } else if (cmp == 0) termStates[numToMerge++] = mergeStates[i]; } FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(termStates[0].text, termStates[0].textOffset); // Now termStates has numToMerge FieldMergeStates // which all share the same term. Now we must // interleave the docID streams. while (numToMerge > 0) { FreqProxFieldMergeState minState = termStates[0]; for (int i = 1; i < numToMerge; i++) if (termStates[i].docID < minState.docID) minState = termStates[i]; int termDocFreq = minState.termFreq; FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(minState.docID, termDocFreq); ByteSliceReader prox = minState.prox; // Carefully copy over the prox + payload info, // changing the format to match Lucene's segment // format. if (!currentFieldOmitTermFreqAndPositions) { // omitTermFreqAndPositions == false so we do write positions & // payload int position = 0; for (int j = 0; j < termDocFreq; j++) { int code = prox.ReadVInt(); position += (code >> 1); int payloadLength; if ((code & 1) != 0) { // This position has a payload payloadLength = prox.ReadVInt(); if (payloadBuffer == null || payloadBuffer.Length < payloadLength) payloadBuffer = new byte[payloadLength]; prox.ReadBytes(payloadBuffer, 0, payloadLength); } else payloadLength = 0; posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); } //End for posConsumer.Finish(); } if (!minState.NextDoc()) { // Remove from termStates int upto = 0; for (int i = 0; i < numToMerge; i++) if (termStates[i] != minState) termStates[upto++] = termStates[i]; numToMerge--; System.Diagnostics.Debug.Assert(upto == numToMerge); // Advance this state to the next term if (!minState.NextTerm()) { // OK, no more terms, so remove from mergeStates // as well upto = 0; for (int i = 0; i < numFields; i++) if (mergeStates[i] != minState) mergeStates[upto++] = mergeStates[i]; numFields--; System.Diagnostics.Debug.Assert(upto == numFields); } } } docConsumer.Finish(); } termsConsumer.Finish(); }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc - smi.delCount); if (smi.Next()) { queue.Add(smi); } // initialize queue else { smi.Dispose(); } } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = queue.Pop(); top = queue.Top(); } if ((System.Object)currentField != (System.Object)term.Field) { currentField = term.Field; if (termsConsumer != null) { termsConsumer.Finish(); } FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) { queue.Add(smi); } // restore queue else { smi.Dispose(); // done with a segment } } } }
private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) { int base_Renamed = 0; int readerCount = readers.Count; for (int i = 0; i < readerCount; i++) { IndexReader reader = (IndexReader) readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); int[] docMap = smi.GetDocMap(); if (docMap != null) { if (docMaps == null) { docMaps = new int[readerCount][]; delCounts = new int[readerCount]; } docMaps[i] = docMap; delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs(); } base_Renamed += reader.NumDocs(); System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount); if (smi.Next()) queue.Add(smi); // initialize queue else smi.Close(); } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; System.String currentField = null; FormatPostingsTermsConsumer termsConsumer = null; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo) queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo) queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo) queue.Pop(); top = (SegmentMergeInfo) queue.Top(); } if ((System.Object) currentField != (System.Object) term.field) { currentField = term.field; if (termsConsumer != null) termsConsumer.Finish(); FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); termsConsumer = consumer.AddField(fieldInfo); omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; } int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort.Work(df / 3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) queue.Add(smi); // restore queue else smi.Close(); // done with a segment } } }
/* Walk through all unique text tokens (Posting * instances) found in this field and serialize them * into a single RAM segment. */ internal void AppendPostings(FreqProxTermsWriterPerField[] fields, FormatPostingsFieldsConsumer consumer) { int numFields = fields.Length; FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields]; for (int i = 0; i < numFields; i++) { FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]); System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields [0].fieldInfo); // Should always be true bool result = fms.NextTerm(); System.Diagnostics.Debug.Assert(result); } FormatPostingsTermsConsumer termsConsumer = consumer.AddField(fields[0].fieldInfo); FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields]; bool currentFieldOmitTermFreqAndPositions = fields[0].fieldInfo.omitTermFreqAndPositions; while (numFields > 0) { // Get the next term to merge termStates[0] = mergeStates[0]; int numToMerge = 1; for (int i = 1; i < numFields; i++) { char[] text = mergeStates[i].text; int textOffset = mergeStates[i].textOffset; int cmp = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset); if (cmp < 0) { termStates[0] = mergeStates[i]; numToMerge = 1; } else if (cmp == 0) { termStates[numToMerge++] = mergeStates[i]; } } FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(termStates[0].text, termStates[0].textOffset); // Now termStates has numToMerge FieldMergeStates // which all share the same term. Now we must // interleave the docID streams. while (numToMerge > 0) { FreqProxFieldMergeState minState = termStates[0]; for (int i = 1; i < numToMerge; i++) { if (termStates[i].docID < minState.docID) { minState = termStates[i]; } } int termDocFreq = minState.termFreq; FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(minState.docID, termDocFreq); ByteSliceReader prox = minState.prox; // Carefully copy over the prox + payload info, // changing the format to match Lucene's segment // format. if (!currentFieldOmitTermFreqAndPositions) { // omitTermFreqAndPositions == false so we do write positions & // payload int position = 0; for (int j = 0; j < termDocFreq; j++) { int code = prox.ReadVInt(); position += (code >> 1); int payloadLength; if ((code & 1) != 0) { // This position has a payload payloadLength = prox.ReadVInt(); if (payloadBuffer == null || payloadBuffer.Length < payloadLength) { payloadBuffer = new byte[payloadLength]; } prox.ReadBytes(payloadBuffer, 0, payloadLength); } else { payloadLength = 0; } posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); } //End for posConsumer.Finish(); } if (!minState.NextDoc()) { // Remove from termStates int upto = 0; for (int i = 0; i < numToMerge; i++) { if (termStates[i] != minState) { termStates[upto++] = termStates[i]; } } numToMerge--; System.Diagnostics.Debug.Assert(upto == numToMerge); // Advance this state to the next term if (!minState.NextTerm()) { // OK, no more terms, so remove from mergeStates // as well upto = 0; for (int i = 0; i < numFields; i++) { if (mergeStates[i] != minState) { mergeStates[upto++] = mergeStates[i]; } } numFields--; System.Diagnostics.Debug.Assert(upto == numFields); } } } docConsumer.Finish(); } termsConsumer.Finish(); }