public override void FinishTerm(BytesRef text, TermStats stats) { if (Debugging.AssertsEnabled) { Debugging.Assert(stats.DocFreq > 0); } //System.out.println("BTW: finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq); bool isIndexTerm = fieldIndexWriter.CheckIndexTerm(text, stats); if (isIndexTerm) { if (pendingCount > 0) { // Instead of writing each term, live, we gather terms // in RAM in a pending buffer, and then write the // entire block in between index terms: FlushBlock(); } fieldIndexWriter.Add(text, stats, outerInstance.m_output.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream //System.out.println(" index term!"); } if (pendingTerms.Length == pendingCount) { TermEntry[] newArray = new TermEntry[ArrayUtil.Oversize(pendingCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.Array.Copy(pendingTerms, 0, newArray, 0, pendingCount); for (int i = pendingCount; i < newArray.Length; i++) { newArray[i] = new TermEntry(); } pendingTerms = newArray; } TermEntry te = pendingTerms[pendingCount]; te.Term.CopyBytes(text); te.State = postingsWriter.NewTermState(); te.State.DocFreq = stats.DocFreq; te.State.TotalTermFreq = stats.TotalTermFreq; postingsWriter.FinishTerm(te.State); pendingCount++; numTerms++; }
public override void FinishTerm(BytesRef text, TermStats stats) { Debug.Assert(stats.DocFreq > 0); var isIndexTerm = _fieldIndexWriter.CheckIndexTerm(text, stats); if (isIndexTerm) { if (_pendingCount > 0) { // Instead of writing each term, live, we gather terms // in RAM in a pending buffer, and then write the // entire block in between index terms: FlushBlock(); } _fieldIndexWriter.Add(text, stats, _btw._output.FilePointer); } if (_pendingTerms.Length == _pendingCount) { var newArray = new TermEntry[ArrayUtil.Oversize(_pendingCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; Array.Copy(_pendingTerms, 0, newArray, 0, _pendingCount); for (var i = _pendingCount; i < newArray.Length; i++) { newArray[i] = new TermEntry(); } _pendingTerms = newArray; } var te = _pendingTerms[_pendingCount]; te.Term.CopyBytes(text); te.State = _postingsWriter.NewTermState(); te.State.DocFreq = stats.DocFreq; te.State.TotalTermFreq = stats.TotalTermFreq; _postingsWriter.FinishTerm(te.State); _pendingCount++; _numTerms++; }
/// <summary> /// Called when we are done adding docs to this term /// </summary> /// <param name="state"></param> public override void FinishTerm(BlockTermState state) { var state2 = (PulsingTermState)state; Debug.Assert(_pendingCount > 0 || _pendingCount == -1); if (_pendingCount == -1) { state2.wrappedState.DocFreq = state2.DocFreq; state2.wrappedState.TotalTermFreq = state2.TotalTermFreq; state2.bytes = null; _wrappedPostingsWriter.FinishTerm(state2.wrappedState); } else { // There were few enough total occurrences for this // term, so we fully inline our postings data into // terms dict, now: // TODO: it'd be better to share this encoding logic // in some inner codec that knows how to write a // single doc / single position, etc. This way if a // given codec wants to store other interesting // stuff, it could use this pulsing codec to do so if (_indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { var lastDocID = 0; var pendingIDX = 0; var lastPayloadLength = -1; var lastOffsetLength = -1; while (pendingIDX < _pendingCount) { var doc = _pending[pendingIDX]; var delta = doc.docID - lastDocID; lastDocID = doc.docID; // if (DEBUG) System.out.println(" write doc=" + doc.docID + " freq=" + doc.termFreq); if (doc.termFreq == 1) { _buffer.WriteVInt32((delta << 1) | 1); } else { _buffer.WriteVInt32(delta << 1); _buffer.WriteVInt32(doc.termFreq); } var lastPos = 0; var lastOffset = 0; for (var posIDX = 0; posIDX < doc.termFreq; posIDX++) { var pos = _pending[pendingIDX++]; Debug.Assert(pos.docID == doc.docID); var posDelta = pos.pos - lastPos; lastPos = pos.pos; var payloadLength = pos.payload == null ? 0 : pos.payload.Length; if (_storePayloads) { if (payloadLength != lastPayloadLength) { _buffer.WriteVInt32((posDelta << 1) | 1); _buffer.WriteVInt32(payloadLength); lastPayloadLength = payloadLength; } else { _buffer.WriteVInt32(posDelta << 1); } } else { _buffer.WriteVInt32(posDelta); } if (_indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) { //System.out.println("write=" + pos.startOffset + "," + pos.endOffset); var offsetDelta = pos.startOffset - lastOffset; var offsetLength = pos.endOffset - pos.startOffset; if (offsetLength != lastOffsetLength) { _buffer.WriteVInt32(offsetDelta << 1 | 1); _buffer.WriteVInt32(offsetLength); } else { _buffer.WriteVInt32(offsetDelta << 1); } lastOffset = pos.startOffset; lastOffsetLength = offsetLength; } if (payloadLength > 0) { Debug.Assert(_storePayloads); _buffer.WriteBytes(pos.payload.Bytes, 0, pos.payload.Length); } } } } else if (_indexOptions == IndexOptions.DOCS_AND_FREQS) { int lastDocId = 0; for (int posIdx = 0; posIdx < _pendingCount; posIdx++) { Position doc = _pending[posIdx]; int delta = doc.docID - lastDocId; Debug.Assert(doc.termFreq != 0); if (doc.termFreq == 1) { _buffer.WriteVInt32((delta << 1) | 1); } else { _buffer.WriteVInt32(delta << 1); _buffer.WriteVInt32(doc.termFreq); } lastDocId = doc.docID; } } else if (_indexOptions == IndexOptions.DOCS_ONLY) { int lastDocId = 0; for (int posIdx = 0; posIdx < _pendingCount; posIdx++) { Position doc = _pending[posIdx]; _buffer.WriteVInt32(doc.docID - lastDocId); lastDocId = doc.docID; } } state2.bytes = new byte[(int)_buffer.GetFilePointer()]; _buffer.WriteTo(state2.bytes, 0); _buffer.Reset(); } _pendingCount = 0; }