// Process all occurrences of a single field in one doc; // count is 1 if a given field occurs only once in the // Document, which is the "typical" case internal override void processFields(Fieldable[] fields, int count) { StoredFieldsWriter.PerDoc doc; if (perThread.doc == null) { doc = perThread.doc = perThread.storedFieldsWriter.getPerDoc(); doc.docID = docState.docID; perThread.localFieldsWriter.SetFieldsStream(doc.fdt); System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields); System.Diagnostics.Debug.Assert(0 == doc.fdt.Length()); System.Diagnostics.Debug.Assert(0 == doc.fdt.GetFilePointer()); } else { doc = perThread.doc; System.Diagnostics.Debug.Assert(doc.docID == docState.docID, "doc.docID=" + doc.docID + " docState.docID=" + docState.docID); } for (int i = 0; i < count; i++) { Fieldable field = fields[i]; if (field.IsStored()) { perThread.localFieldsWriter.WriteField(fieldInfo, field); System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerField.processFields.writeField")); doc.numStoredFields++; } } }
internal override void NewTerm(RawPostingList p0) { // First time we're seeing this term since the last // flush System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.newTerm start")); FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList)p0; p.lastDocID = docState.docID; if (omitTermFreqAndPositions) { p.lastDocCode = docState.docID; } else { p.lastDocCode = docState.docID << 1; p.docFreq = 1; WriteProx(p, fieldState.position); } }
public void AddField(IFieldable field, FieldInfo fieldInfo) { if (doc == null) { doc = storedFieldsWriter.GetPerDoc(); doc.docID = docState.docID; localFieldsWriter.SetFieldsStream(doc.fdt); System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields); System.Diagnostics.Debug.Assert(0 == doc.fdt.Length); System.Diagnostics.Debug.Assert(0 == doc.fdt.FilePointer); } localFieldsWriter.WriteField(fieldInfo, field); System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField")); doc.numStoredFields++; }
/// <summary>Called once per field per document if term vectors /// are enabled, to write the vectors to /// RAMOutputStream, which is then quickly flushed to /// * the real term vectors files in the Directory. /// </summary> internal override void Finish() { System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start")); int numPostings = termsHashPerField.numPostings; System.Diagnostics.Debug.Assert(numPostings >= 0); if (!doVectors || numPostings == 0) { return; } if (numPostings > maxNumPostings) { maxNumPostings = numPostings; } IndexOutput tvf = perThread.doc.tvf; // This is called once, after inverting all occurences // of a given field in the doc. At this point we flush // our hash into the DocWriter. System.Diagnostics.Debug.Assert(fieldInfo.storeTermVector); System.Diagnostics.Debug.Assert(perThread.VectorFieldsInOrder(fieldInfo)); perThread.doc.AddField(termsHashPerField.fieldInfo.number); RawPostingList[] postings = termsHashPerField.SortPostings(); tvf.WriteVInt(numPostings); byte bits = (byte)(0x0); if (doVectorPositions) { bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; } if (doVectorOffsets) { bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; } tvf.WriteByte(bits); int encoderUpto = 0; int lastTermBytesCount = 0; ByteSliceReader reader = perThread.vectorSliceReader; char[][] charBuffers = perThread.termsHashPerThread.charPool.buffers; for (int j = 0; j < numPostings; j++) { TermVectorsTermsWriter.PostingList posting = (TermVectorsTermsWriter.PostingList)postings[j]; int freq = posting.freq; char[] text2 = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK; // We swap between two encoders to save copying // last Term's byte array UnicodeUtil.UTF8Result utf8Result = perThread.utf8Results[encoderUpto]; // TODO: we could do this incrementally UnicodeUtil.UTF16toUTF8(text2, start2, utf8Result); int termBytesCount = utf8Result.length; // TODO: UTF16toUTF8 could tell us this prefix // Compute common prefix between last term and // this term int prefix = 0; if (j > 0) { byte[] lastTermBytes = perThread.utf8Results[1 - encoderUpto].result; byte[] termBytes = perThread.utf8Results[encoderUpto].result; while (prefix < lastTermBytesCount && prefix < termBytesCount) { if (lastTermBytes[prefix] != termBytes[prefix]) { break; } prefix++; } } encoderUpto = 1 - encoderUpto; lastTermBytesCount = termBytesCount; int suffix = termBytesCount - prefix; tvf.WriteVInt(prefix); tvf.WriteVInt(suffix); tvf.WriteBytes(utf8Result.result, prefix, suffix); tvf.WriteVInt(freq); if (doVectorPositions) { termsHashPerField.InitReader(reader, posting, 0); reader.WriteTo(tvf); } if (doVectorOffsets) { termsHashPerField.InitReader(reader, posting, 1); reader.WriteTo(tvf); } } termsHashPerField.Reset(); perThread.termsHashPerThread.Reset(false); }