public override void Flush(IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) { var childThreadsAndFields = new HashMap<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>>(); var endChildThreadsAndFields = new HashMap<InvertedDocEndConsumerPerThread, ICollection<InvertedDocEndConsumerPerField>>(); foreach (var entry in threadsAndFields) { var perThread = (DocInverterPerThread) entry.Key; ICollection<InvertedDocConsumerPerField> childFields = new HashSet<InvertedDocConsumerPerField>(); ICollection<InvertedDocEndConsumerPerField> endChildFields = new HashSet<InvertedDocEndConsumerPerField>(); foreach(DocFieldConsumerPerField field in entry.Value) { var perField = (DocInverterPerField)field; childFields.Add(perField.consumer); endChildFields.Add(perField.endConsumer); } childThreadsAndFields[perThread.consumer] = childFields; endChildThreadsAndFields[perThread.endConsumer] = endChildFields; } consumer.Flush(childThreadsAndFields, state); endConsumer.Flush(endChildThreadsAndFields, state); }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { if (tvx != null) { if (state.numDocsInStore > 0) // In case there are some final documents that we // didn't see (because they hit a non-aborting exception): Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); tvx.Flush(); tvd.Flush(); tvf.Flush(); } System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.Collections.IEnumerator it2 = ((System.Collections.ICollection) entry.Value).GetEnumerator(); while (it2.MoveNext()) { TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) ((System.Collections.DictionaryEntry) it2.Current).Key; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key; perThread.termsHashPerThread.Reset(true); } } }
public override void Flush(SegmentWriteState state) { int numDocs = state.SegmentInfo.DocCount; if (numDocs > 0) { // It's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the FieldsWriter: InitFieldsWriter(state.Context); Fill(numDocs); } if (FieldsWriter != null) { bool success = false; try { FieldsWriter.Finish(state.FieldInfos, numDocs); success = true; } finally { if (success) { IOUtils.Close(FieldsWriter); } else { IOUtils.CloseWhileHandlingException(FieldsWriter); } } } }
public override void Flush(Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) { Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> oneThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(); Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> twoThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(); foreach (KeyValuePair<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> entry in new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(threadsAndFields)) { DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key; IList<DocFieldConsumerPerField> fields = entry.Value; //IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator(); IList<DocFieldConsumerPerField> oneFields = new List<DocFieldConsumerPerField>(); IList<DocFieldConsumerPerField> twoFields = new List<DocFieldConsumerPerField>(); foreach (DocFieldConsumersPerField perField in fields) { oneFields.Add(perField.one); twoFields.Add(perField.two); } oneThreadsAndFields[perThread.one] = oneFields; twoThreadsAndFields[perThread.two] = twoFields; } one.Flush(oneThreadsAndFields, state); two.Flush(twoThreadsAndFields, state); }
public void CloseDocStore(SegmentWriteState state) { lock (this) { int inc = state.numDocsInStore - lastDocID; if (inc > 0) { InitFieldsWriter(); Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); } if (fieldsWriter != null) { fieldsWriter.Close(); fieldsWriter = null; lastDocID = 0; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; if (4 + ((long) state.numDocsInStore) * 8 != state.directory.FileLength(fileName)) throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); } } }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary endChildThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; DocInverterPerThread perThread = (DocInverterPerThread) entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable childFields = new System.Collections.Hashtable(); System.Collections.Hashtable endChildFields = new System.Collections.Hashtable(); while (fieldsIt.MoveNext()) { DocInverterPerField perField = (DocInverterPerField) ((System.Collections.DictionaryEntry) fieldsIt.Current).Key; childFields[perField.consumer] = perField.consumer; endChildFields[perField.endConsumer] = perField.endConsumer; } childThreadsAndFields[perThread.consumer] = childFields; endChildThreadsAndFields[perThread.endConsumer] = endChildFields; } consumer.Flush(childThreadsAndFields, state); endConsumer.Flush(endChildThreadsAndFields, state); }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary oneThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary twoThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable oneFields = new System.Collections.Hashtable(); System.Collections.Hashtable twoFields = new System.Collections.Hashtable(); while (fieldsIt.MoveNext()) { DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldsIt.Current; SupportClass.CollectionsHelper.AddIfNotContains(oneFields, perField.one); SupportClass.CollectionsHelper.AddIfNotContains(twoFields, perField.two); } oneThreadsAndFields[perThread.one] = oneFields; twoThreadsAndFields[perThread.two] = twoFields; } one.Flush(oneThreadsAndFields, state); two.Flush(twoThreadsAndFields, state); }
public override FieldsConsumer FieldsConsumer(SegmentWriteState state) { PostingsWriterBase docsWriter = null; PostingsWriterBase pulsingWriterInner = null; PostingsWriterBase pulsingWriter = null; // Terms dict bool success = false; try { docsWriter = new Lucene41PostingsWriter(state); pulsingWriterInner = new PulsingPostingsWriter(state, 2, docsWriter); pulsingWriter = new PulsingPostingsWriter(state, 1, pulsingWriterInner); FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE); success = true; return ret; } finally { if (!success) { IOUtils.CloseWhileHandlingException(docsWriter, pulsingWriterInner, pulsingWriter); } } }
internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent) : base() { this.parent = parent; termsOut = parent.termsOut; docsWriter = new FormatPostingsDocsWriter(state, this); }
public override void Flush(IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) { var oneThreadsAndFields = new HashMap<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>(); var twoThreadsAndFields = new HashMap<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>(); foreach(var entry in threadsAndFields) { DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key; ICollection<DocFieldConsumerPerField> fields = entry.Value; IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator(); ICollection<DocFieldConsumerPerField> oneFields = new HashSet<DocFieldConsumerPerField>(); ICollection<DocFieldConsumerPerField> twoFields = new HashSet<DocFieldConsumerPerField>(); while (fieldsIt.MoveNext()) { DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldsIt.Current; oneFields.Add(perField.one); twoFields.Add(perField.two); } oneThreadsAndFields[perThread.one] = oneFields; twoThreadsAndFields[perThread.two] = twoFields; } one.Flush(oneThreadsAndFields, state); two.Flush(twoThreadsAndFields, state); }
public override void Flush(Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) { Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>> childThreadsAndFields = new Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>>(); Support.Dictionary<InvertedDocEndConsumerPerThread, IList<InvertedDocEndConsumerPerField>> endChildThreadsAndFields = new Support.Dictionary<InvertedDocEndConsumerPerThread, IList<InvertedDocEndConsumerPerField>>(); foreach (KeyValuePair<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> entry in new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(threadsAndFields)) { DocInverterPerThread perThread = (DocInverterPerThread)entry.Key; List<InvertedDocConsumerPerField> childFields = new List<InvertedDocConsumerPerField>(); List<InvertedDocEndConsumerPerField> endChildFields = new List<InvertedDocEndConsumerPerField>(); foreach (DocFieldConsumerPerField field in entry.Value) { DocInverterPerField perField = (DocInverterPerField)field; childFields.Add(perField.consumer); endChildFields.Add(perField.endConsumer); } childThreadsAndFields[perThread.consumer] = childFields; endChildThreadsAndFields[perThread.endConsumer] = endChildFields; } consumer.Flush(childThreadsAndFields, state); endConsumer.Flush(endChildThreadsAndFields, state); }
public override void CloseDocStore(SegmentWriteState state) { try { one.CloseDocStore(state); } finally { two.CloseDocStore(state); } }
public override void Flush(IDictionary<string, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state) { IDictionary<string, InvertedDocConsumerPerField> childFieldsToFlush = new Dictionary<string, InvertedDocConsumerPerField>(); IDictionary<string, InvertedDocEndConsumerPerField> endChildFieldsToFlush = new Dictionary<string, InvertedDocEndConsumerPerField>(); foreach (KeyValuePair<string, DocFieldConsumerPerField> fieldToFlush in fieldsToFlush) { DocInverterPerField perField = (DocInverterPerField)fieldToFlush.Value; childFieldsToFlush[fieldToFlush.Key] = perField.Consumer; endChildFieldsToFlush[fieldToFlush.Key] = perField.EndConsumer; } Consumer.Flush(childFieldsToFlush, state); EndConsumer.Flush(endChildFieldsToFlush, state); }
internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base() { this.parent = parent; System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName); out_Renamed = parent.parent.dir.CreateOutput(fileName); totalNumDocs = parent.parent.totalNumDocs; // TODO: abstraction violation skipInterval = parent.parent.termsOut.skipInterval; skipListWriter = parent.parent.skipListWriter; skipListWriter.SetFreqOutput(out_Renamed); posWriter = new FormatPostingsPositionsWriter(state, this); }
public override FieldsConsumer FieldsConsumer(SegmentWriteState state) { PostingsWriterBase docs = new Lucene41PostingsWriter(state); // TODO: should we make the terms index more easily // pluggable? Ie so that this codec would record which // index impl was used, and switch on loading? // Or... you must make a new Codec for this? TermsIndexWriterBase indexWriter; bool success = false; try { indexWriter = new FixedGapTermsIndexWriter(state); success = true; } finally { if (!success) { docs.Dispose(); } } success = false; try { // Must use BlockTermsWriter (not BlockTree) because // BlockTree doens't support ords (yet)... FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs); success = true; return ret; } finally { if (!success) { try { docs.Dispose(); } finally { indexWriter.Dispose(); } } } }
internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent) { this.parent = parent; omitTermFreqAndPositions = parent.omitTermFreqAndPositions; if (parent.parent.parent.fieldInfos.HasProx()) { // At least one field does not omit TF, so create the // prox file System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION); state.flushedFiles.Add(fileName); out_Renamed = parent.parent.parent.dir.CreateOutput(fileName); parent.skipListWriter.SetProxOutput(out_Renamed); } // Every field omits TF so we will write no prox file else out_Renamed = null; }
internal override void Flush(IDictionary<string, InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) { bool success = false; DocValuesConsumer normsConsumer = null; try { if (state.FieldInfos.HasNorms()) { NormsFormat normsFormat = state.SegmentInfo.Codec.NormsFormat(); Debug.Assert(normsFormat != null); normsConsumer = normsFormat.NormsConsumer(state); foreach (FieldInfo fi in state.FieldInfos) { NormsConsumerPerField toWrite = (NormsConsumerPerField)fieldsToFlush[fi.Name]; // we must check the final value of omitNorms for the fieldinfo, it could have // changed for this field since the first time we added it. if (!fi.OmitsNorms()) { if (toWrite != null && !toWrite.Empty) { toWrite.Flush(state, normsConsumer); Debug.Assert(fi.NormType == DocValuesType.NUMERIC); } else if (fi.Indexed) { Debug.Assert(fi.NormType == null, "got " + fi.NormType + "; field=" + fi.Name); } } } } success = true; } finally { if (success) { IOUtils.Close(normsConsumer); } else { IOUtils.CloseWhileHandlingException(normsConsumer); } } }
public override void Flush(IDictionary<DocConsumerPerThread, DocConsumerPerThread> threads, SegmentWriteState state) { Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> childThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(); foreach (DocFieldProcessorPerThread perThread in threads.Keys) { childThreadsAndFields[perThread.consumer] = perThread.Fields(); perThread.TrimFields(state); } fieldsWriter.Flush(state); consumer.Flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with // FieldInfo.storePayload. System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); fieldInfos.Write(state.directory, fileName); state.flushedFiles.Add(fileName); }
public FormatPostingsFieldsWriter(SegmentWriteState state, FieldInfos fieldInfos):base() { dir = state.directory; segment = state.segmentName; totalNumDocs = state.numDocs; this.fieldInfos = fieldInfos; termsOut = new TermInfosWriter(dir, segment, fieldInfos, state.termIndexInterval); // TODO: this is a nasty abstraction violation (that we // peek down to find freqOut/proxOut) -- we need a // better abstraction here whereby these child consumers // can provide skip data or not skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, totalNumDocs, null, null); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)); termsWriter = new FormatPostingsTermsWriter(state, this); }
public override FieldsConsumer FieldsConsumer(SegmentWriteState state) { PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new MockIntFactory(baseBlockSize)); bool success = false; TermsIndexWriterBase indexWriter; try { indexWriter = new FixedGapTermsIndexWriter(state); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } success = false; try { FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter); success = true; return ret; } finally { if (!success) { try { postingsWriter.Dispose(); } finally { indexWriter.Dispose(); } } } }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { // NOTE: it's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the TermVectorsWriter. This is // actually OK (unlike in the stored fields case) // because, although IieldInfos.hasVectors() will return // true, the TermVectorsReader gracefully handles // non-existence of the term vectors files. if (tvx != null) { if (state.numDocsInStore > 0) // In case there are some final documents that we // didn't see (because they hit a non-aborting exception): Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); tvx.Flush(); tvd.Flush(); tvf.Flush(); } System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.Collections.IEnumerator it2 = ((System.Collections.ICollection) entry.Value).GetEnumerator(); while (it2.MoveNext()) { TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) ((System.Collections.DictionaryEntry) it2.Current).Key; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key; perThread.termsHashPerThread.Reset(true); } } }
public override void Flush(System.Collections.ICollection threads, SegmentWriteState state) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IEnumerator it = threads.GetEnumerator(); while (it.MoveNext()) { DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) ((System.Collections.DictionaryEntry) it.Current).Key; childThreadsAndFields[perThread.consumer] = perThread.Fields(); perThread.TrimFields(state); } fieldsWriter.Flush(state); consumer.Flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with // FieldInfo.storePayload. System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); fieldInfos.Write(state.directory, fileName); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName); }
public void Flush(SegmentWriteState state) { lock (this) { if (state.numDocsInStore > 0) { // It's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the FieldsWriter: InitFieldsWriter(); // Fill fdx file to include any final docs that we // skipped because they hit non-aborting exceptions Fill(state.numDocsInStore - docWriter.DocStoreOffset); } if (fieldsWriter != null) fieldsWriter.Flush(); } }
public override void Flush(Support.Dictionary<TermsHashConsumerPerThread, IList<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state) { lock (this) { // NOTE: it's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the TermVectorsWriter. This is // actually OK (unlike in the stored fields case) // because, although IieldInfos.hasVectors() will return // true, the TermVectorsReader gracefully handles // non-existence of the term vectors files. if (tvx != null) { if (state.numDocsInStore > 0) // In case there are some final documents that we // didn't see (because they hit a non-aborting exception): Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); tvx.Flush(); tvd.Flush(); tvf.Flush(); } foreach(KeyValuePair<TermsHashConsumerPerThread,IList<TermsHashConsumerPerField>> entry in threadsAndFields) { foreach (TermsHashConsumerPerField field in entry.Value ) { TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key; perThread.termsHashPerThread.Reset(true); } } }
// TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates System.Collections.ArrayList allFields = new System.Collections.ArrayList(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); while (fieldsIt.MoveNext()) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; if (perField.termsHashPerField.numPostings > 0) { allFields.Add(perField); } } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; // TODO: allow Lucene user to customize this consumer: FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); /* * Current writer chain: * FormatPostingsFieldsConsumer * -> IMPL: FormatPostingsFieldsWriter * -> FormatPostingsTermsConsumer * -> IMPL: FormatPostingsTermsWriter * -> FormatPostingsDocConsumer * -> IMPL: FormatPostingsDocWriter * -> FormatPostingsPositionsConsumer * -> IMPL: FormatPostingsPositionsWriter */ int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo; System.String fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName)) { end++; } FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = (FreqProxTermsWriterPerField)allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(fields, consumer); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.Reset(); perField.ShrinkHash(numPostings); fields[i].Reset(); } start = end; } it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.Reset(true); } consumer.Finish(); }
internal MergeState Merge() { if (!ShouldMerge) { throw new InvalidOperationException("Merge would result in 0 document segment"); } // NOTE: it's important to add calls to // checkAbort.work(...) if you make any changes to this // method that will spend alot of time. The frequency // of this check impacts how long // IndexWriter.close(false) takes to actually stop the // threads. MergeFieldInfos(); SetMatchingSegmentReaders(); long t0 = 0; if (mergeState.InfoStream.IsEnabled("SM")) { t0 = Time.NanoTime(); } int numMerged = MergeFields(); if (mergeState.InfoStream.IsEnabled("SM")) { long t1 = Time.NanoTime(); mergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge stored fields [" + numMerged + " docs]"); } if (Debugging.AssertsEnabled) { Debugging.Assert(numMerged == mergeState.SegmentInfo.DocCount); } SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.InfoStream, directory, mergeState.SegmentInfo, mergeState.FieldInfos, termIndexInterval, null, context); if (mergeState.InfoStream.IsEnabled("SM")) { t0 = Time.NanoTime(); } MergeTerms(segmentWriteState); if (mergeState.InfoStream.IsEnabled("SM")) { long t1 = Time.NanoTime(); mergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge postings [" + numMerged + " docs]"); } if (mergeState.InfoStream.IsEnabled("SM")) { t0 = Time.NanoTime(); } if (mergeState.FieldInfos.HasDocValues) { MergeDocValues(segmentWriteState); } if (mergeState.InfoStream.IsEnabled("SM")) { long t1 = Time.NanoTime(); mergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge doc values [" + numMerged + " docs]"); } if (mergeState.FieldInfos.HasNorms) { if (mergeState.InfoStream.IsEnabled("SM")) { t0 = Time.NanoTime(); } MergeNorms(segmentWriteState); if (mergeState.InfoStream.IsEnabled("SM")) { long t1 = Time.NanoTime(); mergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge norms [" + numMerged + " docs]"); } } if (mergeState.FieldInfos.HasVectors) { if (mergeState.InfoStream.IsEnabled("SM")) { t0 = Time.NanoTime(); } numMerged = MergeVectors(); if (mergeState.InfoStream.IsEnabled("SM")) { long t1 = Time.NanoTime(); mergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge vectors [" + numMerged + " docs]"); } if (Debugging.AssertsEnabled) { Debugging.Assert(numMerged == mergeState.SegmentInfo.DocCount); } } // write the merged infos FieldInfosWriter fieldInfosWriter = codec.FieldInfosFormat.FieldInfosWriter; fieldInfosWriter.Write(directory, mergeState.SegmentInfo.Name, "", mergeState.FieldInfos, context); return(mergeState); }
internal virtual FlushedSegment Flush() { Debug.Assert(numDocsInRAM > 0); Debug.Assert(deleteSlice.IsEmpty, "all deletes must be applied in prepareFlush"); segmentInfo.DocCount = numDocsInRAM; SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.Finish(), indexWriterConfig.TermIndexInterval, pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed))); double startMBUsed = BytesUsed / 1024.0 / 1024.0; // Apply delete-by-docID now (delete-byDocID only // happens when an exception is hit processing that // doc, eg if analyzer has some problem w/ the text): if (pendingUpdates.docIDs.Count > 0) { flushState.LiveDocs = codec.LiveDocsFormat.NewLiveDocs(numDocsInRAM); foreach (int delDocID in pendingUpdates.docIDs) { flushState.LiveDocs.Clear(delDocID); } flushState.DelCountOnFlush = pendingUpdates.docIDs.Count; pendingUpdates.bytesUsed.AddAndGet(-pendingUpdates.docIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID); pendingUpdates.docIDs.Clear(); } if (aborting) { if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "flush: skip because aborting is set"); } return(null); } if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM); } bool success = false; try { consumer.Flush(flushState); pendingUpdates.terms.Clear(); segmentInfo.SetFiles(new HashSet <string>(directory.CreatedFiles)); SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, -1L, -1L); if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs"); infoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq ? "freqs" : "no freqs")); infoStream.Message("DWPT", "flushedFiles=" + Arrays.ToString(segmentInfoPerCommit.GetFiles())); infoStream.Message("DWPT", "flushed codec=" + codec); } BufferedUpdates segmentDeletes; if (pendingUpdates.queries.Count == 0 && pendingUpdates.numericUpdates.Count == 0 && pendingUpdates.binaryUpdates.Count == 0) { pendingUpdates.Clear(); segmentDeletes = null; } else { segmentDeletes = pendingUpdates; } if (infoStream.IsEnabled("DWPT")) { double newSegmentSize = segmentInfoPerCommit.GetSizeInBytes() / 1024.0 / 1024.0; infoStream.Message("DWPT", "flushed: segment=" + segmentInfo.Name + " ramUsed=" + startMBUsed.ToString(nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(nf)); } Debug.Assert(segmentInfo != null); FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush); SealFlushedSegment(fs); success = true; return(fs); } finally { if (!success) { Abort(filesToDelete); } } }
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary byField = new System.Collections.Hashtable(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.ArrayList fieldsToRemove = new System.Collections.ArrayList(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; if (perField.upto > 0) { // It has some norms System.Collections.IList l = (System.Collections.IList)byField[perField.fieldInfo]; if (l == null) { l = new System.Collections.ArrayList(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields; for (int i = 0; i < fieldsToRemove.Count; i++) { fieldsHT.Remove(fieldsToRemove[i]); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles[normsFileName] = normsFileName; IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) { fields[j] = (NormsWriterPerField)toMerge[j]; } int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) { normsOut.WriteByte(defaultNorm); } normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer()); } } finally { normsOut.Close(); } }
/// <summary> /// Walk through all unique text tokens (Posting /// instances) found in this field and serialize them /// into a single RAM segment. /// </summary> internal void Flush(string fieldName, FieldsConsumer consumer, SegmentWriteState state) { if (!fieldInfo.IsIndexed) { return; // nothing to flush, don't bother the codec with the unindexed field } TermsConsumer termsConsumer = consumer.AddField(fieldInfo); IComparer <BytesRef> termComp = termsConsumer.Comparer; // CONFUSING: this.indexOptions holds the index options // that were current when we first saw this field. But // it's possible this has changed, eg when other // documents are indexed that cause a "downgrade" of the // IndexOptions. So we must decode the in-RAM buffer // according to this.indexOptions, but then write the // new segment to the directory according to // currentFieldIndexOptions: IndexOptions currentFieldIndexOptions = fieldInfo.IndexOptions; Debug.Assert(currentFieldIndexOptions != IndexOptions.NONE); bool writeTermFreq = currentFieldIndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS) >= 0; bool writePositions = currentFieldIndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; bool writeOffsets = currentFieldIndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; bool readTermFreq = this.hasFreq; bool readPositions = this.hasProx; bool readOffsets = this.hasOffsets; //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets); // Make sure FieldInfo.update is working correctly!: Debug.Assert(!writeTermFreq || readTermFreq); Debug.Assert(!writePositions || readPositions); Debug.Assert(!writeOffsets || readOffsets); Debug.Assert(!writeOffsets || writePositions); IDictionary <Term, int?> segDeletes; if (state.SegUpdates != null && state.SegUpdates.terms.Count > 0) { segDeletes = state.SegUpdates.terms; } else { segDeletes = null; } int[] termIDs = termsHashPerField.SortPostings(termComp); int numTerms = termsHashPerField.bytesHash.Count; BytesRef text = new BytesRef(); FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray; ByteSliceReader freq = new ByteSliceReader(); ByteSliceReader prox = new ByteSliceReader(); FixedBitSet visitedDocs = new FixedBitSet(state.SegmentInfo.DocCount); long sumTotalTermFreq = 0; long sumDocFreq = 0; Term protoTerm = new Term(fieldName); for (int i = 0; i < numTerms; i++) { int termID = termIDs[i]; // Get BytesRef int textStart = postings.textStarts[termID]; termsHashPerField.bytePool.SetBytesRef(text, textStart); termsHashPerField.InitReader(freq, termID, 0); if (readPositions || readOffsets) { termsHashPerField.InitReader(prox, termID, 1); } // TODO: really TermsHashPerField should take over most // of this loop, including merge sort of terms from // multiple threads and interacting with the // TermsConsumer, only calling out to us (passing us the // DocsConsumer) to handle delivery of docs/positions PostingsConsumer postingsConsumer = termsConsumer.StartTerm(text); int?delDocLimit; if (segDeletes != null) { protoTerm.Bytes = text; int?docIDUpto; segDeletes.TryGetValue(protoTerm, out docIDUpto); if (docIDUpto != null) { delDocLimit = docIDUpto; } else { delDocLimit = 0; } } else { delDocLimit = 0; } // Now termStates has numToMerge FieldMergeStates // which all share the same term. Now we must // interleave the docID streams. int docFreq = 0; long totalTermFreq = 0; int docID = 0; while (true) { //System.out.println(" cycle"); int termFreq; if (freq.Eof()) { if (postings.lastDocCodes[termID] != -1) { // Return last doc docID = postings.lastDocIDs[termID]; if (readTermFreq) { termFreq = postings.termFreqs[termID]; } else { termFreq = -1; } postings.lastDocCodes[termID] = -1; } else { // EOF break; } } else { int code = freq.ReadVInt32(); if (!readTermFreq) { docID += code; termFreq = -1; } else { docID += (int)((uint)code >> 1); if ((code & 1) != 0) { termFreq = 1; } else { termFreq = freq.ReadVInt32(); } } Debug.Assert(docID != postings.lastDocIDs[termID]); } docFreq++; Debug.Assert(docID < state.SegmentInfo.DocCount, "doc=" + docID + " maxDoc=" + state.SegmentInfo.DocCount); // NOTE: we could check here if the docID was // deleted, and skip it. However, this is somewhat // dangerous because it can yield non-deterministic // behavior since we may see the docID before we see // the term that caused it to be deleted. this // would mean some (but not all) of its postings may // make it into the index, which'd alter the docFreq // for those terms. We could fix this by doing two // passes, ie first sweep marks all del docs, and // 2nd sweep does the real flush, but I suspect // that'd add too much time to flush. visitedDocs.Set(docID); postingsConsumer.StartDoc(docID, writeTermFreq ? termFreq : -1); if (docID < delDocLimit) { // Mark it deleted. TODO: we could also skip // writing its postings; this would be // deterministic (just for this Term's docs). // TODO: can we do this reach-around in a cleaner way???? if (state.LiveDocs == null) { state.LiveDocs = docState.docWriter.codec.LiveDocsFormat.NewLiveDocs(state.SegmentInfo.DocCount); } if (state.LiveDocs.Get(docID)) { state.DelCountOnFlush++; state.LiveDocs.Clear(docID); } } totalTermFreq += termFreq; // Carefully copy over the prox + payload info, // changing the format to match Lucene's segment // format. if (readPositions || readOffsets) { // we did record positions (& maybe payload) and/or offsets int position = 0; int offset = 0; for (int j = 0; j < termFreq; j++) { BytesRef thisPayload; if (readPositions) { int code = prox.ReadVInt32(); position += (int)((uint)code >> 1); if ((code & 1) != 0) { // this position has a payload int payloadLength = prox.ReadVInt32(); if (payload == null) { payload = new BytesRef(); payload.Bytes = new byte[payloadLength]; } else if (payload.Bytes.Length < payloadLength) { payload.Grow(payloadLength); } prox.ReadBytes(payload.Bytes, 0, payloadLength); payload.Length = payloadLength; thisPayload = payload; } else { thisPayload = null; } if (readOffsets) { int startOffset = offset + prox.ReadVInt32(); int endOffset = startOffset + prox.ReadVInt32(); if (writePositions) { if (writeOffsets) { Debug.Assert(startOffset >= 0 && endOffset >= startOffset, "startOffset=" + startOffset + ",endOffset=" + endOffset + ",offset=" + offset); postingsConsumer.AddPosition(position, thisPayload, startOffset, endOffset); } else { postingsConsumer.AddPosition(position, thisPayload, -1, -1); } } offset = startOffset; } else if (writePositions) { postingsConsumer.AddPosition(position, thisPayload, -1, -1); } } } } postingsConsumer.FinishDoc(); } termsConsumer.FinishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1)); sumTotalTermFreq += totalTermFreq; sumDocFreq += docFreq; } termsConsumer.Finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.Cardinality()); }
public abstract void Flush(SegmentWriteState state);
public override void Flush(IDictionary <DocFieldConsumerPerThread, ICollection <DocFieldConsumerPerField> > threadsAndFields, SegmentWriteState state) { var childThreadsAndFields = new HashMap <InvertedDocConsumerPerThread, ICollection <InvertedDocConsumerPerField> >(); var endChildThreadsAndFields = new HashMap <InvertedDocEndConsumerPerThread, ICollection <InvertedDocEndConsumerPerField> >(); foreach (var entry in threadsAndFields) { var perThread = (DocInverterPerThread)entry.Key; ICollection <InvertedDocConsumerPerField> childFields = new HashSet <InvertedDocConsumerPerField>(); ICollection <InvertedDocEndConsumerPerField> endChildFields = new HashSet <InvertedDocEndConsumerPerField>(); foreach (DocFieldConsumerPerField field in entry.Value) { var perField = (DocInverterPerField)field; childFields.Add(perField.consumer); endChildFields.Add(perField.endConsumer); } childThreadsAndFields[perThread.consumer] = childFields; endChildThreadsAndFields[perThread.endConsumer] = endChildFields; } consumer.Flush(childThreadsAndFields, state); endConsumer.Flush(endChildThreadsAndFields, state); }
public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) { lock (this) { if (Debugging.AssertsEnabled) { Debugging.Assert(Monitor.IsEntered(writer)); } //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); if (Debugging.AssertsEnabled) { Debugging.Assert(dvUpdates.Any()); } // Do this so we can delete any created files on // exception; this saves all codecs from having to do // it: TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); FieldInfos fieldInfos = null; bool success = false; try { Codec codec = Info.Info.Codec; // reader could be null e.g. for a just merged segment (from // IndexWriter.commitMergedDeletes). SegmentReader reader = this.reader ?? new SegmentReader(Info, writer.Config.ReaderTermsIndexDivisor, IOContext.READ_ONCE); try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen foreach (FieldInfo fi in reader.FieldInfos) { FieldInfo clone = builder.Add(fi); // copy the stuff FieldInfos.Builder doesn't copy if (fi.Attributes != null) { foreach (KeyValuePair <string, string> e in fi.Attributes) { clone.PutAttribute(e.Key, e.Value); } } clone.DocValuesGen = fi.DocValuesGen; } // create new fields or update existing ones to have NumericDV type foreach (string f in dvUpdates.numericDVUpdates.Keys) { builder.AddOrUpdate(f, NumericDocValuesField.TYPE); } // create new fields or update existing ones to have BinaryDV type foreach (string f in dvUpdates.binaryDVUpdates.Keys) { builder.AddOrUpdate(f, BinaryDocValuesField.TYPE); } fieldInfos = builder.Finish(); long nextFieldInfosGen = Info.NextFieldInfosGen; string segmentSuffix = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX)); SegmentWriteState state = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix); DocValuesFormat docValuesFormat = codec.DocValuesFormat; DocValuesConsumer fieldsConsumer = docValuesFormat.FieldsConsumer(state); bool fieldsConsumerSuccess = false; try { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates); foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates) { string field = e.Key; NumericDocValuesFieldUpdates fieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); if (Debugging.AssertsEnabled) { Debugging.Assert(fieldInfo != null); } fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddNumericField(fieldInfo, GetInt64Enumerable(reader, field, fieldUpdates)); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates); foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates) { string field = e.Key; BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); if (Debugging.AssertsEnabled) { Debugging.Assert(fieldInfo != null); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates)); } codec.FieldInfosFormat.FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT); fieldsConsumerSuccess = true; } finally { if (fieldsConsumerSuccess) { fieldsConsumer.Dispose(); } else { IOUtils.DisposeWhileHandlingException(fieldsConsumer); } } } finally { if (reader != this.reader) { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); reader.Dispose(); } } success = true; } finally { if (!success) { // Advance only the nextWriteDocValuesGen so that a 2nd // attempt to write will write to a new file Info.AdvanceNextWriteFieldInfosGen(); // Delete any partially created file(s): foreach (string fileName in trackingDir.CreatedFiles) { try { dir.DeleteFile(fileName); } catch (Exception t) when(t.IsThrowable()) { // Ignore so we throw only the first exc } } } } Info.AdvanceFieldInfosGen(); // copy all the updates to mergingUpdates, so they can later be applied to the merged segment if (isMerging) { foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates) { if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates)) { mergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates) { if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates)) { mergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } } // create a new map, keeping only the gens that are in use IDictionary <long, ISet <string> > genUpdatesFiles = Info.UpdatesFiles; IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >(); long fieldInfosGen = Info.FieldInfosGen; foreach (FieldInfo fi in fieldInfos) { long dvGen = fi.DocValuesGen; if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen)) { if (dvGen == fieldInfosGen) { newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles; } else { newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen]; } } } Info.SetGenUpdatesFiles(newGenUpdatesFiles); // wrote new files, should checkpoint() writer.Checkpoint(); // if there is a reader open, reopen it to reflect the updates if (reader != null) { SegmentReader newReader = new SegmentReader(Info, reader, liveDocs, Info.Info.DocCount - Info.DelCount - pendingDeleteCount); bool reopened = false; try { reader.DecRef(); reader = newReader; reopened = true; } finally { if (!reopened) { newReader.DecRef(); } } } } }
public abstract void Flush(IDictionary <TermsHashConsumerPerThread, ICollection <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state);
internal override void CloseDocStore(SegmentWriteState state) { }
public abstract void Flush(SegmentWriteState state, DocValuesConsumer consumer);
/// <summary> /// Merges the readers into the directory passed to the constructor </summary> /// <returns> The number of documents that were merged </returns> /// <exception cref="CorruptIndexException"> if the index is corrupt </exception> /// <exception cref="IOException"> if there is a low-level IO error </exception> public MergeState Merge() { if (!ShouldMerge()) { throw new InvalidOperationException("Merge would result in 0 document segment"); } // NOTE: it's important to add calls to // checkAbort.work(...) if you make any changes to this // method that will spend alot of time. The frequency // of this check impacts how long // IndexWriter.close(false) takes to actually stop the // threads. MergeFieldInfos(); SetMatchingSegmentReaders(); long t0 = 0; if (MergeState.InfoStream.IsEnabled("SM")) { t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; } int numMerged = MergeFields(); if (MergeState.InfoStream.IsEnabled("SM")) { long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; MergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge stored fields [" + numMerged + " docs]"); } Debug.Assert(numMerged == MergeState.SegmentInfo.DocCount); SegmentWriteState segmentWriteState = new SegmentWriteState(MergeState.InfoStream, Directory, MergeState.SegmentInfo, MergeState.FieldInfos, TermIndexInterval, null, Context); if (MergeState.InfoStream.IsEnabled("SM")) { t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; } MergeTerms(segmentWriteState); if (MergeState.InfoStream.IsEnabled("SM")) { long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; MergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge postings [" + numMerged + " docs]"); } if (MergeState.InfoStream.IsEnabled("SM")) { t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; } if (MergeState.FieldInfos.HasDocValues()) { MergeDocValues(segmentWriteState); } if (MergeState.InfoStream.IsEnabled("SM")) { long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; MergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge doc values [" + numMerged + " docs]"); } if (MergeState.FieldInfos.HasNorms()) { if (MergeState.InfoStream.IsEnabled("SM")) { t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; } MergeNorms(segmentWriteState); if (MergeState.InfoStream.IsEnabled("SM")) { long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; MergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge norms [" + numMerged + " docs]"); } } if (MergeState.FieldInfos.HasVectors()) { if (MergeState.InfoStream.IsEnabled("SM")) { t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; } numMerged = MergeVectors(); if (MergeState.InfoStream.IsEnabled("SM")) { long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; MergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge vectors [" + numMerged + " docs]"); } Debug.Assert(numMerged == MergeState.SegmentInfo.DocCount); } // write the merged infos FieldInfosWriter fieldInfosWriter = Codec.FieldInfosFormat().FieldInfosWriter; fieldInfosWriter.Write(Directory, MergeState.SegmentInfo.Name, "", MergeState.FieldInfos, Context); return MergeState; }
internal void ShrinkFreePostings(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = ArrayUtil.GetShrinkSize(postingsFreeList.Length, postingsAllocCount); if (newSize != postingsFreeList.Length) { RawPostingList[] newArray = new RawPostingList[newSize]; Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } }
internal override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new System.Collections.Hashtable(); } else { nextThreadsAndFields = null; } System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; TermsHashPerThread perThread = (TermsHashPerThread)entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable childFields = new System.Collections.Hashtable(); System.Collections.Hashtable nextChildFields; if (nextTermsHash != null) { nextChildFields = new System.Collections.Hashtable(); } else { nextChildFields = null; } while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; childFields[perField.consumer] = perField.consumer; if (nextTermsHash != null) { nextChildFields[perField.nextPerField] = perField.nextPerField; } } childThreadsAndFields[perThread.consumer] = childFields; if (nextTermsHash != null) { nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; } } consumer.Flush(childThreadsAndFields, state); ShrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) { nextTermsHash.Flush(nextThreadsAndFields, state); } } }
public abstract void CloseDocStore(SegmentWriteState state);
internal abstract void CloseDocStore(SegmentWriteState state);
// LUCENENE specific - original was internal, but FreqProxTermsWriter requires public (little point, since both are internal classes) public override void Flush(IDictionary <string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state) { if (writer != null) { int numDocs = state.SegmentInfo.DocCount; Debug.Assert(numDocs > 0); // At least one doc in this run had term vectors enabled try { Fill(numDocs); Debug.Assert(state.SegmentInfo != null); writer.Finish(state.FieldInfos, numDocs); } finally { IOUtils.Dispose(writer); writer = null; lastDocID = 0; hasVectors = false; } } foreach (TermsHashConsumerPerField field in fieldsToFlush.Values) { TermVectorsConsumerPerField perField = (TermVectorsConsumerPerField)field; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } }
public override void Flush(IDictionary <string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates IList <FreqProxTermsWriterPerField> allFields = new List <FreqProxTermsWriterPerField>(); foreach (TermsHashConsumerPerField f in fieldsToFlush.Values) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)f; if (perField.termsHashPerField.bytesHash.Count > 0) { allFields.Add(perField); } } int numAllFields = allFields.Count; // Sort by field name CollectionUtil.IntroSort(allFields); FieldsConsumer consumer = state.SegmentInfo.Codec.PostingsFormat.FieldsConsumer(state); bool success = false; try { TermsHash termsHash = null; /* * Current writer chain: * FieldsConsumer * -> IMPL: FormatPostingsTermsDictWriter * -> TermsConsumer * -> IMPL: FormatPostingsTermsDictWriter.TermsWriter * -> DocsConsumer * -> IMPL: FormatPostingsDocsWriter * -> PositionsConsumer * -> IMPL: FormatPostingsPositionsWriter */ for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) { FieldInfo fieldInfo = allFields[fieldNumber].fieldInfo; FreqProxTermsWriterPerField fieldWriter = allFields[fieldNumber]; // If this field has postings then add them to the // segment fieldWriter.Flush(fieldInfo.Name, consumer, state); TermsHashPerField perField = fieldWriter.termsHashPerField; if (Debugging.AssertsEnabled) { Debugging.Assert(termsHash == null || termsHash == perField.termsHash); } termsHash = perField.termsHash; int numPostings = perField.bytesHash.Count; perField.Reset(); perField.ShrinkHash(/* numPostings // LUCENENET: Not used */); fieldWriter.Reset(); } if (termsHash != null) { termsHash.Reset(); } success = true; } finally { if (success) { IOUtils.Dispose(consumer); } else { IOUtils.DisposeWhileHandlingException(consumer); } } }
internal override void Flush(IDictionary <string, InvertedDocConsumerPerField> fieldsToFlush, SegmentWriteState state) { IDictionary <string, TermsHashConsumerPerField> childFields = new Dictionary <string, TermsHashConsumerPerField>(); IDictionary <string, InvertedDocConsumerPerField> nextChildFields; if (nextTermsHash != null) { nextChildFields = new Dictionary <string, InvertedDocConsumerPerField>(); } else { nextChildFields = null; } foreach (KeyValuePair <string, InvertedDocConsumerPerField> entry in fieldsToFlush) { TermsHashPerField perField = (TermsHashPerField)entry.Value; childFields[entry.Key] = perField.consumer; if (nextTermsHash != null) { nextChildFields[entry.Key] = perField.nextPerField; } } consumer.Flush(childFields, state); if (nextTermsHash != null) { nextTermsHash.Flush(nextChildFields, state); } }
internal override void CloseDocStore(SegmentWriteState state) { }
internal abstract void Flush(IDictionary <string, InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state);
// TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... public override void Flush(Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates List <FreqProxTermsWriterPerField> allFields = new List <FreqProxTermsWriterPerField>(); foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields) { IList <TermsHashConsumerPerField> fields = entry.Value; foreach (TermsHashConsumerPerField i in fields) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i; if (perField.termsHashPerField.numPostings > 0) { allFields.Add(perField); } } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; // TODO: allow Lucene user to customize this consumer: FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); /* * Current writer chain: * FormatPostingsFieldsConsumer * -> IMPL: FormatPostingsFieldsWriter * -> FormatPostingsTermsConsumer * -> IMPL: FormatPostingsTermsWriter * -> FormatPostingsDocConsumer * -> IMPL: FormatPostingsDocWriter * -> FormatPostingsPositionsConsumer * -> IMPL: FormatPostingsPositionsWriter */ int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo; System.String fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName)) { end++; } FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = (FreqProxTermsWriterPerField)allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(fields, consumer); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.Reset(); perField.ShrinkHash(numPostings); fields[i].Reset(); } start = end; } foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields) { FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.Reset(true); } consumer.Finish(); }
internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent) : base() { this.parent = parent; termsOut = parent.termsOut; docsWriter = new FormatPostingsDocsWriter(state, this); }
private void MergeDocValues(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = codec.DocValuesFormat.FieldsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in mergeState.FieldInfos) { DocValuesType type = field.DocValuesType; if (type != DocValuesType.NONE) { if (type == DocValuesType.NUMERIC) { IList <NumericDocValues> toMerge = new List <NumericDocValues>(); IList <IBits> docsWithField = new List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { NumericDocValues values = reader.GetNumericDocValues(field.Name); IBits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_NUMERIC; bits = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc); } toMerge.Add(values); docsWithField.Add(bits); } consumer.MergeNumericField(field, mergeState, toMerge, docsWithField); } else if (type == DocValuesType.BINARY) { IList <BinaryDocValues> toMerge = new List <BinaryDocValues>(); IList <IBits> docsWithField = new List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { BinaryDocValues values = reader.GetBinaryDocValues(field.Name); IBits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_BINARY; bits = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc); } toMerge.Add(values); docsWithField.Add(bits); } consumer.MergeBinaryField(field, mergeState, toMerge, docsWithField); } else if (type == DocValuesType.SORTED) { IList <SortedDocValues> toMerge = new List <SortedDocValues>(); foreach (AtomicReader reader in mergeState.Readers) { SortedDocValues values = reader.GetSortedDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED; } toMerge.Add(values); } consumer.MergeSortedField(field, mergeState, toMerge); } else if (type == DocValuesType.SORTED_SET) { IList <SortedSetDocValues> toMerge = new List <SortedSetDocValues>(); foreach (AtomicReader reader in mergeState.Readers) { SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED_SET; } toMerge.Add(values); } consumer.MergeSortedSetField(field, mergeState, toMerge); } else { throw new InvalidOperationException("type=" + type); } } } success = true; } finally { if (success) { IOUtils.Dispose(consumer); } else { IOUtils.DisposeWhileHandlingException(consumer); } } }
public abstract void Flush(IDictionary <string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state);
private void MergeNorms(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = Codec.NormsFormat().NormsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in MergeState.FieldInfos) { if (field.HasNorms()) { IList<NumericDocValues> toMerge = new List<NumericDocValues>(); //IList<Bits> docsWithField = new List<Bits>(); foreach (AtomicReader reader in MergeState.Readers) { NumericDocValues norms = reader.GetNormValues(field.Name); if (norms == null) { norms = DocValues.EMPTY_NUMERIC; } toMerge.Add(norms); //docsWithField.Add(new Lucene.Net.Util.Bits_MatchAllBits(reader.MaxDoc)); } consumer.MergeNumericField(field, MergeState, toMerge/*, docsWithField*/); } } success = true; } finally { if (success) { IOUtils.Close(consumer); } else { IOUtils.CloseWhileHandlingException(consumer); } } }
public override void CloseDocStore(SegmentWriteState state) { consumer.CloseDocStore(state); fieldsWriter.CloseDocStore(state); }
internal override void Flush(IDictionary <string, InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) { bool success = false; DocValuesConsumer normsConsumer = null; try { if (state.FieldInfos.HasNorms) { NormsFormat normsFormat = state.SegmentInfo.Codec.NormsFormat; if (Debugging.AssertsEnabled) { Debugging.Assert(normsFormat != null); } normsConsumer = normsFormat.NormsConsumer(state); foreach (FieldInfo fi in state.FieldInfos) { NormsConsumerPerField toWrite = (NormsConsumerPerField)fieldsToFlush[fi.Name]; // we must check the final value of omitNorms for the fieldinfo, it could have // changed for this field since the first time we added it. if (!fi.OmitsNorms) { if (toWrite != null && !toWrite.IsEmpty) { toWrite.Flush(state, normsConsumer); if (Debugging.AssertsEnabled) { Debugging.Assert(fi.NormType == DocValuesType.NUMERIC); } } else if (fi.IsIndexed) { if (Debugging.AssertsEnabled) { Debugging.Assert(fi.NormType == DocValuesType.NONE, "got {0}; field={1}", fi.NormType, fi.Name); } } } } } success = true; } finally { if (success) { IOUtils.Dispose(normsConsumer); } else { IOUtils.DisposeWhileHandlingException(normsConsumer); } } }
public override void Flush(IDictionary <DocConsumerPerThread, DocConsumerPerThread> threads, SegmentWriteState state) { Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > childThreadsAndFields = new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >(); foreach (DocFieldProcessorPerThread perThread in threads.Keys) { childThreadsAndFields[perThread.consumer] = perThread.Fields(); perThread.TrimFields(state); } fieldsWriter.Flush(state); consumer.Flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with // FieldInfo.storePayload. System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); fieldInfos.Write(state.directory, fileName); state.flushedFiles.Add(fileName); }
public abstract void Flush(System.Collections.ICollection threads, SegmentWriteState state);
private void MergeTerms(SegmentWriteState segmentWriteState) { IList<Fields> fields = new List<Fields>(); IList<ReaderSlice> slices = new List<ReaderSlice>(); int docBase = 0; for (int readerIndex = 0; readerIndex < MergeState.Readers.Count; readerIndex++) { AtomicReader reader = MergeState.Readers[readerIndex]; Fields f = reader.Fields; int maxDoc = reader.MaxDoc; if (f != null) { slices.Add(new ReaderSlice(docBase, maxDoc, readerIndex)); fields.Add(f); } docBase += maxDoc; } FieldsConsumer consumer = Codec.PostingsFormat().FieldsConsumer(segmentWriteState); bool success = false; try { consumer.Merge(MergeState, new MultiFields(fields.ToArray(/*Fields.EMPTY_ARRAY*/), slices.ToArray(/*ReaderSlice.EMPTY_ARRAY*/))); success = true; } finally { if (success) { IOUtils.Close(consumer); } else { IOUtils.CloseWhileHandlingException(consumer); } } }
internal void ShrinkFreePostings(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = 1; if (newSize != postingsFreeList.Length) { if (postingsFreeCount > newSize) { if (trackAllocations) { docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting); } postingsFreeCount = newSize; postingsAllocCount = newSize; } RawPostingList[] newArray = new RawPostingList[newSize]; Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } }
/// <summary>If there are fields we've seen but did not see again /// in the last run, then free them up. /// </summary> internal void TrimFields(SegmentWriteState state) { for (int i = 0; i < fieldHash.Length; i++) { DocFieldProcessorPerField perField = fieldHash[i]; DocFieldProcessorPerField lastPerField = null; while (perField != null) { if (perField.lastGen == - 1) { // This field was not seen since the previous // flush, so, free up its resources now // Unhash if (lastPerField == null) fieldHash[i] = perField.next; else lastPerField.next = perField.next; if (state.docWriter.infoStream != null) state.docWriter.infoStream.WriteLine(" purge field=" + perField.fieldInfo.name); totalFieldCount--; } else { // Reset perField.lastGen = - 1; lastPerField = perField; } perField = perField.next; } } }
public abstract void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state);
// TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... public override void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates var allFields = new List<FreqProxTermsWriterPerField>(); foreach(var entry in threadsAndFields) { var fields = entry.Value; foreach(var i in fields) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i; if (perField.termsHashPerField.numPostings > 0) allFields.Add(perField); } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; // TODO: allow Lucene user to customize this consumer: FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); /* Current writer chain: FormatPostingsFieldsConsumer -> IMPL: FormatPostingsFieldsWriter -> FormatPostingsTermsConsumer -> IMPL: FormatPostingsTermsWriter -> FormatPostingsDocConsumer -> IMPL: FormatPostingsDocWriter -> FormatPostingsPositionsConsumer -> IMPL: FormatPostingsPositionsWriter */ int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = allFields[start].fieldInfo; System.String fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && allFields[end].fieldInfo.name.Equals(fieldName)) end++; FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(fields, consumer); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.Reset(); perField.ShrinkHash(numPostings); fields[i].Reset(); } start = end; } foreach(var entry in threadsAndFields) { FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.Key; perThread.termsHashPerThread.Reset(true); } consumer.Finish(); }
internal override void Flush(IDictionary <string, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state) { IDictionary <string, InvertedDocConsumerPerField> childFieldsToFlush = new Dictionary <string, InvertedDocConsumerPerField>(); IDictionary <string, InvertedDocEndConsumerPerField> endChildFieldsToFlush = new Dictionary <string, InvertedDocEndConsumerPerField>(); foreach (KeyValuePair <string, DocFieldConsumerPerField> fieldToFlush in fieldsToFlush) { DocInverterPerField perField = (DocInverterPerField)fieldToFlush.Value; childFieldsToFlush[fieldToFlush.Key] = perField.consumer; endChildFieldsToFlush[fieldToFlush.Key] = perField.endConsumer; } consumer.Flush(childFieldsToFlush, state); endConsumer.Flush(endChildFieldsToFlush, state); }