/** Called once per field per document if term vectors * are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to * * the real term vectors files in the Directory. */ internal override void finish() { System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start")); int numPostings = termsHashPerField.numPostings; System.Diagnostics.Debug.Assert(numPostings >= 0); if (!doVectors || numPostings == 0) { return; } if (numPostings > maxNumPostings) { maxNumPostings = numPostings; } IndexOutput tvf = perThread.doc.tvf; // This is called once, after inverting all occurences // of a given field in the doc. At this point we flush // our hash into the DocWriter. System.Diagnostics.Debug.Assert(fieldInfo.storeTermVector); System.Diagnostics.Debug.Assert(perThread.vectorFieldsInOrder(fieldInfo)); perThread.doc.addField(termsHashPerField.fieldInfo.number); RawPostingList[] postings = termsHashPerField.sortPostings(); tvf.WriteVInt(numPostings); byte bits = 0x0; if (doVectorPositions) { bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; } if (doVectorOffsets) { bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; } tvf.WriteByte(bits); int encoderUpto = 0; int lastTermBytesCount = 0; ByteSliceReader reader = perThread.vectorSliceReader; char[][] charBuffers = perThread.termsHashPerThread.charPool.buffers; for (int j = 0; j < numPostings; j++) { TermVectorsTermsWriter.PostingList posting = (TermVectorsTermsWriter.PostingList)postings[j]; int freq = posting.freq; char[] text2 = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK; // We swap between two encoders to save copying // last Term's byte array UnicodeUtil.UTF8Result utf8Result = perThread.utf8Results[encoderUpto]; // TODO: we could do this incrementally UnicodeUtil.UTF16toUTF8(text2, start2, utf8Result); int termBytesCount = utf8Result.length; // TODO: UTF16toUTF8 could tell us this prefix // Compute common prefix between last term and // this term int prefix = 0; if (j > 0) { byte[] lastTermBytes = perThread.utf8Results[1 - encoderUpto].result; byte[] termBytes = perThread.utf8Results[encoderUpto].result; while (prefix < lastTermBytesCount && prefix < termBytesCount) { if (lastTermBytes[prefix] != termBytes[prefix]) { break; } prefix++; } } encoderUpto = 1 - encoderUpto; lastTermBytesCount = termBytesCount; int suffix = termBytesCount - prefix; tvf.WriteVInt(prefix); tvf.WriteVInt(suffix); tvf.WriteBytes(utf8Result.result, prefix, suffix); tvf.WriteVInt(freq); if (doVectorPositions) { termsHashPerField.initReader(reader, posting, 0); reader.WriteTo(tvf); } if (doVectorOffsets) { termsHashPerField.initReader(reader, posting, 1); reader.WriteTo(tvf); } } termsHashPerField.reset(); perThread.termsHashPerThread.reset(false); }