/// <summary>Initializes shared state for this new document </summary> internal void Init(Document doc, int docID) { System.Diagnostics.Debug.Assert(!isIdle); System.Diagnostics.Debug.Assert(Enclosing_Instance.writer.TestPoint("DocumentsWriter.ThreadState.init start")); this.docID = docID; docBoost = doc.GetBoost(); numStoredFields = 0; numFieldData = 0; numVectorFields = 0; maxTermPrefix = null; System.Diagnostics.Debug.Assert(0 == fdtLocal.Length()); System.Diagnostics.Debug.Assert(0 == fdtLocal.GetFilePointer()); System.Diagnostics.Debug.Assert(0 == tvfLocal.Length()); System.Diagnostics.Debug.Assert(0 == tvfLocal.GetFilePointer()); int thisFieldGen = fieldGen++; System.Collections.IList docFields = doc.GetFields(); int numDocFields = docFields.Count; bool docHasVectors = false; // Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already // seen before (eg suddenly turning on norms or // vectors, etc.): for (int i = 0; i < numDocFields; i++) { Fieldable field = (Fieldable) docFields[i]; FieldInfo fi = Enclosing_Instance.fieldInfos.Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false); if (fi.isIndexed && !fi.omitNorms) { // Maybe grow our buffered norms if (Enclosing_Instance.norms.Length <= fi.number) { int newSize = (int) ((1 + fi.number) * 1.25); BufferedNorms[] newNorms = new BufferedNorms[newSize]; Array.Copy(Enclosing_Instance.norms, 0, newNorms, 0, Enclosing_Instance.norms.Length); Enclosing_Instance.norms = newNorms; } if (Enclosing_Instance.norms[fi.number] == null) Enclosing_Instance.norms[fi.number] = new BufferedNorms(); Enclosing_Instance.hasNorms = true; } // Make sure we have a FieldData allocated int hashPos = fi.name.GetHashCode() & fieldDataHashMask; FieldData fp = fieldDataHash[hashPos]; while (fp != null && !fp.fieldInfo.name.Equals(fi.name)) fp = fp.next; if (fp == null) { fp = new FieldData(this, fi); fp.next = fieldDataHash[hashPos]; fieldDataHash[hashPos] = fp; if (numAllFieldData == allFieldDataArray.Length) { int newSize = (int) (allFieldDataArray.Length * 1.5); int newHashSize = fieldDataHash.Length * 2; FieldData[] newArray = new FieldData[newSize]; FieldData[] newHashArray = new FieldData[newHashSize]; Array.Copy(allFieldDataArray, 0, newArray, 0, numAllFieldData); // Rehash fieldDataHashMask = newSize - 1; for (int j = 0; j < fieldDataHash.Length; j++) { FieldData fp0 = fieldDataHash[j]; while (fp0 != null) { hashPos = fp0.fieldInfo.name.GetHashCode() & fieldDataHashMask; FieldData nextFP0 = fp0.next; fp0.next = newHashArray[hashPos]; newHashArray[hashPos] = fp0; fp0 = nextFP0; } } allFieldDataArray = newArray; fieldDataHash = newHashArray; } allFieldDataArray[numAllFieldData++] = fp; } else { System.Diagnostics.Debug.Assert(fp.fieldInfo == fi); } if (thisFieldGen != fp.lastGen) { // First time we're seeing this field for this doc fp.lastGen = thisFieldGen; fp.fieldCount = 0; fp.doVectors = fp.doVectorPositions = fp.doVectorOffsets = false; fp.doNorms = fi.isIndexed && !fi.omitNorms; if (numFieldData == fieldDataArray.Length) { int newSize = fieldDataArray.Length * 2; FieldData[] newArray = new FieldData[newSize]; Array.Copy(fieldDataArray, 0, newArray, 0, numFieldData); fieldDataArray = newArray; } fieldDataArray[numFieldData++] = fp; } if (field.IsTermVectorStored()) { if (!fp.doVectors && numVectorFields++ == vectorFieldPointers.Length) { int newSize = (int) (numVectorFields * 1.5); vectorFieldPointers = new long[newSize]; vectorFieldNumbers = new int[newSize]; } fp.doVectors = true; docHasVectors = true; fp.doVectorPositions |= field.IsStorePositionWithTermVector(); fp.doVectorOffsets |= field.IsStoreOffsetWithTermVector(); } if (fp.fieldCount == fp.docFields.Length) { Fieldable[] newArray = new Fieldable[fp.docFields.Length * 2]; Array.Copy(fp.docFields, 0, newArray, 0, fp.docFields.Length); fp.docFields = newArray; } // Lazily allocate arrays for postings: if (field.IsIndexed() && fp.postingsHash == null) fp.InitPostingArrays(); fp.docFields[fp.fieldCount++] = field; } // Maybe init the local & global fieldsWriter if (localFieldsWriter == null) { if (Enclosing_Instance.fieldsWriter == null) { System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment == null); System.Diagnostics.Debug.Assert(Enclosing_Instance.segment != null); Enclosing_Instance.docStoreSegment = Enclosing_Instance.segment; // If we hit an exception while init'ing the // fieldsWriter, we must abort this segment // because those files will be in an unknown // state: try { Enclosing_Instance.fieldsWriter = new FieldsWriter(Enclosing_Instance.directory, Enclosing_Instance.docStoreSegment, Enclosing_Instance.fieldInfos); } catch (System.Exception t) { throw new AbortException(t, Enclosing_Instance); } Enclosing_Instance.files = null; } localFieldsWriter = new FieldsWriter(null, fdtLocal, Enclosing_Instance.fieldInfos); } // First time we see a doc that has field(s) with // stored vectors, we init our tvx writer if (docHasVectors) { if (Enclosing_Instance.tvx == null) { System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment != null); // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: try { Enclosing_Instance.tvx = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); Enclosing_Instance.tvx.WriteInt(TermVectorsReader.FORMAT_VERSION); Enclosing_Instance.tvd = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); Enclosing_Instance.tvd.WriteInt(TermVectorsReader.FORMAT_VERSION); Enclosing_Instance.tvf = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); Enclosing_Instance.tvf.WriteInt(TermVectorsReader.FORMAT_VERSION); // We must "catch up" for all docs before us // that had no vectors: for (int i = 0; i < Enclosing_Instance.numDocsInStore; i++) { Enclosing_Instance.tvx.WriteLong(Enclosing_Instance.tvd.GetFilePointer()); Enclosing_Instance.tvd.WriteVInt(0); } } catch (System.Exception t) { throw new AbortException(t, Enclosing_Instance); } Enclosing_Instance.files = null; } numVectorFields = 0; } }
private void InitBlock() { threadStates = new ThreadState[0]; waitingThreadStates = new ThreadState[MAX_THREAD_STATE]; maxBufferedDeleteTerms = IndexWriter.DEFAULT_MAX_BUFFERED_DELETE_TERMS; ramBufferSize = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024); // {{Aroush-2.3.1}} should 'ramBufferSize' maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS; norms = new BufferedNorms[0]; }