internal void FreePerDoc(PerDoc perDoc) { lock (this) { System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length); docFreeList[freeCount++] = perDoc; } }
internal void Free(PerDoc perDoc) { lock (this) { System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length); System.Diagnostics.Debug.Assert(0 == perDoc.numStoredFields); System.Diagnostics.Debug.Assert(0 == perDoc.fdt.Length()); System.Diagnostics.Debug.Assert(0 == perDoc.fdt.GetFilePointer()); docFreeList[freeCount++] = perDoc; } }
internal void FinishDocument(PerDoc perDoc) { lock (this) { System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument start")); InitFieldsWriter(); Fill(perDoc.docID); // Append stored fields to the real FieldsWriter: fieldsWriter.FlushDocument(perDoc.numStoredFields, perDoc.fdt); lastDocID++; perDoc.Reset(); Free(perDoc); System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument end")); } }
internal void FinishDocument(PerDoc perDoc) { lock (this) { System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument start")); InitTermVectorsWriter(); Fill(perDoc.docID); // Append term vectors to the real outputs: tvx.WriteLong(tvd.FilePointer); tvx.WriteLong(tvf.FilePointer); tvd.WriteVInt(perDoc.numVectorFields); if (perDoc.numVectorFields > 0) { for (int i = 0; i < perDoc.numVectorFields; i++) { tvd.WriteVInt(perDoc.fieldNumbers[i]); } System.Diagnostics.Debug.Assert(0 == perDoc.fieldPointers [0]); long lastPos = perDoc.fieldPointers[0]; for (int i = 1; i < perDoc.numVectorFields; i++) { long pos = perDoc.fieldPointers[i]; tvd.WriteVLong(pos - lastPos); lastPos = pos; } perDoc.perDocTvf.WriteTo(tvf); perDoc.numVectorFields = 0; } System.Diagnostics.Debug.Assert(lastDocID == perDoc.docID + docWriter.DocStoreOffset); lastDocID++; perDoc.Reset(); Free(perDoc); System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument end")); } }
private void InitBlock() { docFreeList = new PerDoc[1]; }
internal void FinishDocument(PerDoc perDoc) { lock (this) { System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument start")); InitTermVectorsWriter(); Fill(perDoc.docID); // Append term vectors to the real outputs: tvx.WriteLong(tvd.GetFilePointer()); tvx.WriteLong(tvf.GetFilePointer()); tvd.WriteVInt(perDoc.numVectorFields); if (perDoc.numVectorFields > 0) { for (int i = 0; i < perDoc.numVectorFields; i++) tvd.WriteVInt(perDoc.fieldNumbers[i]); System.Diagnostics.Debug.Assert(0 == perDoc.fieldPointers [0]); long lastPos = perDoc.fieldPointers[0]; for (int i = 1; i < perDoc.numVectorFields; i++) { long pos = perDoc.fieldPointers[i]; tvd.WriteVLong(pos - lastPos); lastPos = pos; } perDoc.perDocTvf.WriteTo(tvf); perDoc.numVectorFields = 0; } System.Diagnostics.Debug.Assert(lastDocID == perDoc.docID + docWriter.GetDocStoreOffset()); lastDocID++; perDoc.Reset(); Free(perDoc); System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument end")); } }
internal void Free(PerDoc perDoc) { lock (this) { System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length); System.Diagnostics.Debug.Assert(0 == perDoc.numStoredFields); System.Diagnostics.Debug.Assert(0 == perDoc.fdt.Length); System.Diagnostics.Debug.Assert(0 == perDoc.fdt.FilePointer); docFreeList[freeCount++] = perDoc; } }
public override DocumentsWriter.DocWriter ProcessDocument() { consumer.StartDocument(); fieldsWriter.StartDocument(); Document doc = docState.doc; System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start")); fieldCount = 0; int thisFieldGen = fieldGen++; System.Collections.Generic.IList <IFieldable> docFields = doc.GetFields(); int numDocFields = docFields.Count; // Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already // seen before (eg suddenly turning on norms or // vectors, etc.): for (int i = 0; i < numDocFields; i++) { IFieldable field = docFields[i]; string fieldName = field.Name; // Make sure we have a PerField allocated int hashPos = fieldName.GetHashCode() & hashMask; DocFieldProcessorPerField fp = fieldHash[hashPos]; while (fp != null && !fp.fieldInfo.name.Equals(fieldName)) { fp = fp.next; } if (fp == null) { // TODO FI: we need to genericize the "flags" that a // field holds, and, how these flags are merged; it // needs to be more "pluggable" such that if I want // to have a new "thing" my Fields can do, I can // easily add it FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored, field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, field.OmitNorms, false, field.OmitTermFreqAndPositions); fp = new DocFieldProcessorPerField(this, fi); fp.next = fieldHash[hashPos]; fieldHash[hashPos] = fp; totalFieldCount++; if (totalFieldCount >= fieldHash.Length / 2) { Rehash(); } } else { fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored, field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, field.OmitNorms, false, field.OmitTermFreqAndPositions); } if (thisFieldGen != fp.lastGen) { // First time we're seeing this field for this doc fp.fieldCount = 0; if (fieldCount == fields.Length) { int newSize = fields.Length * 2; DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize]; Array.Copy(fields, 0, newArray, 0, fieldCount); fields = newArray; } fields[fieldCount++] = fp; fp.lastGen = thisFieldGen; } if (fp.fieldCount == fp.fields.Length) { IFieldable[] newArray = new IFieldable[fp.fields.Length * 2]; Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount); fp.fields = newArray; } fp.fields[fp.fieldCount++] = field; if (field.IsStored) { fieldsWriter.AddField(field, fp.fieldInfo); } } // If we are writing vectors then we must visit // fields in sorted order so they are written in // sorted order. TODO: we actually only need to // sort the subset of fields that have vectors // enabled; we could save [small amount of] CPU // here. QuickSort(fields, 0, fieldCount - 1); for (int i = 0; i < fieldCount; i++) { fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount); } if (docState.maxTermPrefix != null && docState.infoStream != null) { docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); docState.maxTermPrefix = null; } DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument(); DocumentsWriter.DocWriter two = consumer.FinishDocument(); if (one == null) { return(two); } else if (two == null) { return(one); } else { PerDoc both = GetPerDoc(); both.docID = docState.docID; System.Diagnostics.Debug.Assert(one.docID == docState.docID); System.Diagnostics.Debug.Assert(two.docID == docState.docID); both.one = one; both.two = two; return(both); } }