public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary oneThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary twoThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable oneFields = new System.Collections.Hashtable(); System.Collections.Hashtable twoFields = new System.Collections.Hashtable(); while (fieldsIt.MoveNext()) { DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldsIt.Current; SupportClass.CollectionsHelper.AddIfNotContains(oneFields, perField.one); SupportClass.CollectionsHelper.AddIfNotContains(twoFields, perField.two); } oneThreadsAndFields[perThread.one] = oneFields; twoThreadsAndFields[perThread.two] = twoFields; } one.Flush(oneThreadsAndFields, state); two.Flush(twoThreadsAndFields, state); }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary endChildThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; DocInverterPerThread perThread = (DocInverterPerThread) entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable childFields = new System.Collections.Hashtable(); System.Collections.Hashtable endChildFields = new System.Collections.Hashtable(); while (fieldsIt.MoveNext()) { DocInverterPerField perField = (DocInverterPerField) ((System.Collections.DictionaryEntry) fieldsIt.Current).Key; childFields[perField.consumer] = perField.consumer; endChildFields[perField.endConsumer] = perField.endConsumer; } childThreadsAndFields[perThread.consumer] = childFields; endChildThreadsAndFields[perThread.endConsumer] = endChildFields; } consumer.Flush(childThreadsAndFields, state); endConsumer.Flush(endChildThreadsAndFields, state); }
internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base() { this.parent = parent; System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName); out_Renamed = parent.parent.dir.CreateOutput(fileName); totalNumDocs = parent.parent.totalNumDocs; // TODO: abstraction violation skipInterval = parent.parent.termsOut.skipInterval; skipListWriter = parent.parent.skipListWriter; skipListWriter.SetFreqOutput(out_Renamed); posWriter = new FormatPostingsPositionsWriter(state, this); }
internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent) { this.parent = parent; omitTermFreqAndPositions = parent.omitTermFreqAndPositions; if (parent.parent.parent.fieldInfos.HasProx()) { // At least one field does not omit TF, so create the // prox file System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName); out_Renamed = parent.parent.parent.dir.CreateOutput(fileName); parent.skipListWriter.SetProxOutput(out_Renamed); } // Every field omits TF so we will write no prox file else out_Renamed = null; }
public FormatPostingsFieldsWriter(SegmentWriteState state, FieldInfos fieldInfos):base() { dir = state.directory; segment = state.segmentName; totalNumDocs = state.numDocs; this.fieldInfos = fieldInfos; termsOut = new TermInfosWriter(dir, segment, fieldInfos, state.termIndexInterval); // TODO: this is a nasty abstraction violation (that we // peek down to find freqOut/proxOut) -- we need a // better abstraction here whereby these child consumers // can provide skip data or not skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, totalNumDocs, null, null); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)); termsWriter = new FormatPostingsTermsWriter(state, this); }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { // NOTE: it's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the TermVectorsWriter. This is // actually OK (unlike in the stored fields case) // because, although IieldInfos.hasVectors() will return // true, the TermVectorsReader gracefully handles // non-existence of the term vectors files. if (tvx != null) { if (state.numDocsInStore > 0) // In case there are some final documents that we // didn't see (because they hit a non-aborting exception): Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); tvx.Flush(); tvd.Flush(); tvf.Flush(); } System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.Collections.IEnumerator it2 = ((System.Collections.ICollection) entry.Value).GetEnumerator(); while (it2.MoveNext()) { TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) ((System.Collections.DictionaryEntry) it2.Current).Key; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key; perThread.termsHashPerThread.Reset(true); } } }
public void Flush(SegmentWriteState state) { lock (this) { if (state.numDocsInStore > 0) { // It's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the FieldsWriter: InitFieldsWriter(); // Fill fdx file to include any final docs that we // skipped because they hit non-aborting exceptions Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); } if (fieldsWriter != null) fieldsWriter.Flush(); } }
public override void Flush(System.Collections.ICollection threads, SegmentWriteState state) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IEnumerator it = threads.GetEnumerator(); while (it.MoveNext()) { DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) ((System.Collections.DictionaryEntry) it.Current).Key; childThreadsAndFields[perThread.consumer] = perThread.Fields(); perThread.TrimFields(state); } fieldsWriter.Flush(state); consumer.Flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with // FieldInfo.storePayload. System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); fieldInfos.Write(state.directory, fileName); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName); }
/// <summary>Close doc stores </summary> internal abstract void CloseDocStore(SegmentWriteState state);
/// <summary>Flush a new segment </summary> internal abstract void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state);
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary byField = new System.Collections.Hashtable(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.ArrayList fieldsToRemove = new System.Collections.ArrayList(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField) ((System.Collections.DictionaryEntry) fieldsIt.Current).Key; if (perField.upto > 0) { // It has some norms System.Collections.IList l = (System.Collections.IList) byField[perField.fieldInfo]; if (l == null) { l = new System.Collections.ArrayList(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields; for (int i = 0; i < fieldsToRemove.Count; i++) { fieldsHT.Remove(fieldsToRemove[i]); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles[normsFileName] = normsFileName; IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); System.Collections.IList toMerge = (System.Collections.IList) byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) fields[j] = (NormsWriterPerField) toMerge[j]; int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" +(fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) normsOut.WriteByte(defaultNorm); normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) normsOut.WriteByte(defaultNorm); } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) normsOut.WriteByte(defaultNorm); } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" +(4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer()); } } finally { normsOut.Close(); } }
internal override void CloseDocStore(SegmentWriteState state) { }
private void MergeTerms() { SegmentWriteState state = new SegmentWriteState(null, directory, segment, null, mergedDocs, 0, termIndexInterval); FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); try { queue = new SegmentMergeQueue(readers.Count); MergeTermInfos(consumer); } finally { consumer.Finish(); if (queue != null) queue.Close(); } }
/// <summary>If there are fields we've seen but did not see again /// in the last run, then free them up. /// </summary> internal void TrimFields(SegmentWriteState state) { for (int i = 0; i < fieldHash.Length; i++) { DocFieldProcessorPerField perField = fieldHash[i]; DocFieldProcessorPerField lastPerField = null; while (perField != null) { if (perField.lastGen == - 1) { // This field was not seen since the previous // flush, so, free up its resources now // Unhash if (lastPerField == null) fieldHash[i] = perField.next; else lastPerField.next = perField.next; if (state.docWriter.infoStream != null) state.docWriter.infoStream.WriteLine(" purge field=" + perField.fieldInfo.name); totalFieldCount--; } else { // Reset perField.lastGen = - 1; lastPerField = perField; } perField = perField.next; } } }
internal override void CloseDocStore(SegmentWriteState state) { lock (this) { consumer.CloseDocStore(state); if (nextTermsHash != null) nextTermsHash.CloseDocStore(state); } }
public override void CloseDocStore(SegmentWriteState state) { consumer.CloseDocStore(state); endConsumer.CloseDocStore(state); }
private void InitFlushState(bool onlyDocStore) { lock (this) { InitSegmentName(onlyDocStore); flushState = new SegmentWriteState(this, directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, writer.GetTermIndexInterval()); } }
internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent) : base() { this.parent = parent; termsOut = parent.termsOut; docsWriter = new FormatPostingsDocsWriter(state, this); }
public override void CloseDocStore(SegmentWriteState state) { try { one.CloseDocStore(state); } finally { two.CloseDocStore(state); } }
internal void ShrinkFreePostings(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = 1; if (newSize != postingsFreeList.Length) { if (postingsFreeCount > newSize) { if (trackAllocations) { docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting); } postingsFreeCount = newSize; postingsAllocCount = newSize; } RawPostingList[] newArray = new RawPostingList[newSize]; Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } }
internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent):base() { this.parent = parent; termsOut = parent.termsOut; docsWriter = new FormatPostingsDocsWriter(state, this); }
internal override void CloseDocStore(SegmentWriteState state) { lock (this) { if (tvx != null) { // At least one doc in this run had term vectors // enabled Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); tvx.Close(); tvf.Close(); tvd.Close(); tvx = null; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; if (4 + ((long) state.numDocsInStore) * 16 != state.directory.FileLength(fileName)) throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
internal override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new System.Collections.Hashtable(); } else nextThreadsAndFields = null; System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; TermsHashPerThread perThread = (TermsHashPerThread) entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable childFields = new System.Collections.Hashtable(); System.Collections.Hashtable nextChildFields; if (nextTermsHash != null) { nextChildFields = new System.Collections.Hashtable(); } else nextChildFields = null; while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField) ((System.Collections.DictionaryEntry) fieldsIt.Current).Key; childFields[perField.consumer] = perField.consumer; if (nextTermsHash != null) nextChildFields[perField.nextPerField] = perField.nextPerField; } childThreadsAndFields[perThread.consumer] = childFields; if (nextTermsHash != null) nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; } consumer.Flush(childThreadsAndFields, state); ShrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) nextTermsHash.Flush(nextThreadsAndFields, state); } }
public abstract void Flush(System.Collections.ICollection threads, SegmentWriteState state);
/// <summary>Called when DocumentsWriter decides to close the doc /// stores /// </summary> public abstract void CloseDocStore(SegmentWriteState state);
public void CloseDocStore(SegmentWriteState state) { lock (this) { int inc = state.numDocsInStore - lastDocID; if (inc > 0) { InitFieldsWriter(); Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); } if (fieldsWriter != null) { fieldsWriter.Close(); fieldsWriter = null; lastDocID = 0; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; if (4 + ((long) state.numDocsInStore) * 8 != state.directory.FileLength(fileName)) throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); } } }
// TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates System.Collections.ArrayList allFields = new System.Collections.ArrayList(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); while (fieldsIt.MoveNext()) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) ((System.Collections.DictionaryEntry) fieldsIt.Current).Key; if (perField.termsHashPerField.numPostings > 0) allFields.Add(perField); } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; // TODO: allow Lucene user to customize this consumer: FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); /* Current writer chain: FormatPostingsFieldsConsumer -> IMPL: FormatPostingsFieldsWriter -> FormatPostingsTermsConsumer -> IMPL: FormatPostingsTermsWriter -> FormatPostingsDocConsumer -> IMPL: FormatPostingsDocWriter -> FormatPostingsPositionsConsumer -> IMPL: FormatPostingsPositionsWriter */ int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = ((FreqProxTermsWriterPerField) allFields[start]).fieldInfo; System.String fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && ((FreqProxTermsWriterPerField) allFields[end]).fieldInfo.name.Equals(fieldName)) end++; FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = (FreqProxTermsWriterPerField) allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(fields, consumer); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.Reset(); perField.ShrinkHash(numPostings); fields[i].Reset(); } start = end; } it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.Key; perThread.termsHashPerThread.Reset(true); } consumer.Finish(); }