public override void Flush(IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) { var oneThreadsAndFields = new HashMap<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>(); var twoThreadsAndFields = new HashMap<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>(); foreach(var entry in threadsAndFields) { DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key; ICollection<DocFieldConsumerPerField> fields = entry.Value; IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator(); ICollection<DocFieldConsumerPerField> oneFields = new HashSet<DocFieldConsumerPerField>(); ICollection<DocFieldConsumerPerField> twoFields = new HashSet<DocFieldConsumerPerField>(); while (fieldsIt.MoveNext()) { DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldsIt.Current; oneFields.Add(perField.one); twoFields.Add(perField.two); } oneThreadsAndFields[perThread.one] = oneFields; twoThreadsAndFields[perThread.two] = twoFields; } one.Flush(oneThreadsAndFields, state); two.Flush(twoThreadsAndFields, state); }
public override void Flush(IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) { var childThreadsAndFields = new HashMap<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>>(); var endChildThreadsAndFields = new HashMap<InvertedDocEndConsumerPerThread, ICollection<InvertedDocEndConsumerPerField>>(); foreach (var entry in threadsAndFields) { var perThread = (DocInverterPerThread) entry.Key; ICollection<InvertedDocConsumerPerField> childFields = new HashSet<InvertedDocConsumerPerField>(); ICollection<InvertedDocEndConsumerPerField> endChildFields = new HashSet<InvertedDocEndConsumerPerField>(); foreach(DocFieldConsumerPerField field in entry.Value) { var perField = (DocInverterPerField)field; childFields.Add(perField.consumer); endChildFields.Add(perField.endConsumer); } childThreadsAndFields[perThread.consumer] = childFields; endChildThreadsAndFields[perThread.endConsumer] = endChildFields; } consumer.Flush(childThreadsAndFields, state); endConsumer.Flush(endChildThreadsAndFields, state); }
public override void CloseDocStore(SegmentWriteState state) { try { one.CloseDocStore(state); } finally { two.CloseDocStore(state); } }
internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base() { this.parent = parent; System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION); state.flushedFiles.Add(fileName); out_Renamed = parent.parent.dir.CreateOutput(fileName); totalNumDocs = parent.parent.totalNumDocs; // TODO: abstraction violation skipInterval = parent.parent.termsOut.skipInterval; skipListWriter = parent.parent.skipListWriter; skipListWriter.SetFreqOutput(out_Renamed); posWriter = new FormatPostingsPositionsWriter(state, this); }
internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent) { this.parent = parent; omitTermFreqAndPositions = parent.omitTermFreqAndPositions; if (parent.parent.parent.fieldInfos.HasProx()) { // At least one field does not omit TF, so create the // prox file System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION); state.flushedFiles.Add(fileName); out_Renamed = parent.parent.parent.dir.CreateOutput(fileName); parent.skipListWriter.SetProxOutput(out_Renamed); } // Every field omits TF so we will write no prox file else out_Renamed = null; }
public FormatPostingsFieldsWriter(SegmentWriteState state, FieldInfos fieldInfos):base() { dir = state.directory; segment = state.segmentName; totalNumDocs = state.numDocs; this.fieldInfos = fieldInfos; termsOut = new TermInfosWriter(dir, segment, fieldInfos, state.termIndexInterval); // TODO: this is a nasty abstraction violation (that we // peek down to find freqOut/proxOut) -- we need a // better abstraction here whereby these child consumers // can provide skip data or not skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, totalNumDocs, null, null); state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)); state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)); termsWriter = new FormatPostingsTermsWriter(state, this); }
public override void Flush(ICollection<DocConsumerPerThread> threads, SegmentWriteState state) { var childThreadsAndFields = new HashMap<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>(); foreach(DocConsumerPerThread thread in threads) { DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread)thread; childThreadsAndFields[perThread.consumer] = perThread.Fields(); perThread.TrimFields(state); } fieldsWriter.Flush(state); consumer.Flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with // FieldInfo.storePayload. System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); fieldInfos.Write(state.directory, fileName); state.flushedFiles.Add(fileName); }
public void Flush(SegmentWriteState state) { lock (this) { if (state.numDocsInStore > 0) { // It's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the FieldsWriter: InitFieldsWriter(); // Fill fdx file to include any final docs that we // skipped because they hit non-aborting exceptions Fill(state.numDocsInStore - docWriter.DocStoreOffset); } if (fieldsWriter != null) fieldsWriter.Flush(); } }
public override void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state) { lock (this) { // NOTE: it's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the TermVectorsWriter. This is // actually OK (unlike in the stored fields case) // because, although IieldInfos.hasVectors() will return // true, the TermVectorsReader gracefully handles // non-existence of the term vectors files. if (tvx != null) { if (state.numDocsInStore > 0) // In case there are some final documents that we // didn't see (because they hit a non-aborting exception): Fill(state.numDocsInStore - docWriter.DocStoreOffset); tvx.Flush(); tvd.Flush(); tvf.Flush(); } foreach(var entry in threadsAndFields) { foreach(var field in entry.Value) { TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField)field; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key; perThread.termsHashPerThread.Reset(true); } } }
internal abstract void CloseDocStore(SegmentWriteState state);
public abstract void Flush(IDictionary<InvertedDocEndConsumerPerThread, ICollection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state);
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(IDictionary <InvertedDocEndConsumerPerThread, ICollection <InvertedDocEndConsumerPerField> > threadsAndFields, SegmentWriteState state) { IDictionary <FieldInfo, IList <NormsWriterPerField> > byField = new HashMap <FieldInfo, IList <NormsWriterPerField> >(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo foreach (var entry in threadsAndFields) { ICollection <InvertedDocEndConsumerPerField> fields = entry.Value; IEnumerator <InvertedDocEndConsumerPerField> fieldsIt = fields.GetEnumerator(); var fieldsToRemove = new HashSet <NormsWriterPerField>(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current; if (perField.upto > 0) { // It has some norms IList <NormsWriterPerField> l = byField[perField.fieldInfo]; if (l == null) { l = new List <NormsWriterPerField>(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } foreach (var field in fieldsToRemove) { fields.Remove(field); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles.Add(normsFileName); IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); IList <NormsWriterPerField> toMerge = byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) { fields[j] = toMerge[j]; } int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) { normsOut.WriteByte(defaultNorm); } normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.FilePointer, ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.FilePointer); } } finally { normsOut.Close(); } }
internal override void CloseDocStore(SegmentWriteState state) { }
/// <summary>Called when DocumentsWriter decides to close the doc /// stores /// </summary> public abstract void CloseDocStore(SegmentWriteState state);
internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent) : base() { this.parent = parent; termsOut = parent.termsOut; docsWriter = new FormatPostingsDocsWriter(state, this); }
/// <summary>Flush a new segment </summary> internal abstract void Flush( IDictionary <InvertedDocConsumerPerThread, ICollection <InvertedDocConsumerPerField> > threadsAndFields, SegmentWriteState state);
internal override void Flush(IDictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state) { lock (this) { var childThreadsAndFields = new Dictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>>(); Dictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new Dictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>>(); } else nextThreadsAndFields = null; foreach (var entry in threadsAndFields) { TermsHashPerThread perThread = (TermsHashPerThread) entry.Key; ICollection<InvertedDocConsumerPerField> fields = entry.Value; var fieldsIt = fields.GetEnumerator(); ICollection<TermsHashConsumerPerField> childFields = new HashSet<TermsHashConsumerPerField>(); ICollection<InvertedDocConsumerPerField> nextChildFields; if (nextTermsHash != null) { nextChildFields = new HashSet<InvertedDocConsumerPerField>(); } else nextChildFields = null; while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField) fieldsIt.Current; childFields.Add(perField.consumer); if (nextTermsHash != null) nextChildFields.Add(perField.nextPerField); } childThreadsAndFields[perThread.consumer] = childFields; if (nextTermsHash != null) nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; } consumer.Flush(childThreadsAndFields, state); ShrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) nextTermsHash.Flush(nextThreadsAndFields, state); } }
internal override void CloseDocStore(SegmentWriteState state) { lock (this) { consumer.CloseDocStore(state); if (nextTermsHash != null) nextTermsHash.CloseDocStore(state); } }
public override void CloseDocStore(SegmentWriteState state) { consumer.CloseDocStore(state); endConsumer.CloseDocStore(state); }
public override void Flush(IDictionary <DocFieldConsumerPerThread, ICollection <DocFieldConsumerPerField> > threadsAndFields, SegmentWriteState state) { var childThreadsAndFields = new HashMap <InvertedDocConsumerPerThread, ICollection <InvertedDocConsumerPerField> >(); var endChildThreadsAndFields = new HashMap <InvertedDocEndConsumerPerThread, ICollection <InvertedDocEndConsumerPerField> >(); foreach (var entry in threadsAndFields) { var perThread = (DocInverterPerThread)entry.Key; ICollection <InvertedDocConsumerPerField> childFields = new HashSet <InvertedDocConsumerPerField>(); ICollection <InvertedDocEndConsumerPerField> endChildFields = new HashSet <InvertedDocEndConsumerPerField>(); foreach (DocFieldConsumerPerField field in entry.Value) { var perField = (DocInverterPerField)field; childFields.Add(perField.consumer); endChildFields.Add(perField.endConsumer); } childThreadsAndFields[perThread.consumer] = childFields; endChildThreadsAndFields[perThread.endConsumer] = endChildFields; } consumer.Flush(childThreadsAndFields, state); endConsumer.Flush(endChildThreadsAndFields, state); }
public override void Flush(IDictionary <DocFieldConsumerPerThread, ICollection <DocFieldConsumerPerField> > threadsAndFields, SegmentWriteState state) { var oneThreadsAndFields = new HashMap <DocFieldConsumerPerThread, ICollection <DocFieldConsumerPerField> >(); var twoThreadsAndFields = new HashMap <DocFieldConsumerPerThread, ICollection <DocFieldConsumerPerField> >(); foreach (var entry in threadsAndFields) { DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread)entry.Key; ICollection <DocFieldConsumerPerField> fields = entry.Value; IEnumerator <DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator(); ICollection <DocFieldConsumerPerField> oneFields = new HashSet <DocFieldConsumerPerField>(); ICollection <DocFieldConsumerPerField> twoFields = new HashSet <DocFieldConsumerPerField>(); while (fieldsIt.MoveNext()) { DocFieldConsumersPerField perField = (DocFieldConsumersPerField)fieldsIt.Current; oneFields.Add(perField.one); twoFields.Add(perField.two); } oneThreadsAndFields[perThread.one] = oneFields; twoThreadsAndFields[perThread.two] = twoFields; } one.Flush(oneThreadsAndFields, state); two.Flush(twoThreadsAndFields, state); }
public override void Flush(IDictionary <TermsHashConsumerPerThread, ICollection <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state) { lock (this) { // NOTE: it's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the TermVectorsWriter. This is // actually OK (unlike in the stored fields case) // because, although IieldInfos.hasVectors() will return // true, the TermVectorsReader gracefully handles // non-existence of the term vectors files. if (tvx != null) { if (state.numDocsInStore > 0) { // In case there are some final documents that we // didn't see (because they hit a non-aborting exception): Fill(state.numDocsInStore - docWriter.DocStoreOffset); } tvx.Flush(); tvd.Flush(); tvf.Flush(); } foreach (var entry in threadsAndFields) { foreach (var field in entry.Value) { TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField)field; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.Reset(true); } } }
private void InitFlushState(bool onlyDocStore) { lock (this) { InitSegmentName(onlyDocStore); flushState = new SegmentWriteState(this, directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, writer.TermIndexInterval); } }
internal override void CloseDocStore(SegmentWriteState state) { lock (this) { if (tvx != null) { // At least one doc in this run had term vectors // enabled Fill(state.numDocsInStore - docWriter.DocStoreOffset); tvx.Close(); tvf.Close(); tvd.Close(); tvx = null; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; if (4 + ((long) state.numDocsInStore) * 16 != state.directory.FileLength(fileName)) throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); lastDocID = 0; } } }
internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent):base() { this.parent = parent; termsOut = parent.termsOut; docsWriter = new FormatPostingsDocsWriter(state, this); }
/// <summary>Close doc stores </summary> internal abstract void CloseDocStore(SegmentWriteState state);
private void MergeTerms() { SegmentWriteState state = new SegmentWriteState(null, directory, segment, null, mergedDocs, 0, termIndexInterval); FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); try { queue = new SegmentMergeQueue(readers.Count); MergeTermInfos(consumer); } finally { consumer.Finish(); if (queue != null) queue.Dispose(); } }
public void CloseDocStore(SegmentWriteState state) { lock (this) { int inc = state.numDocsInStore - lastDocID; if (inc > 0) { InitFieldsWriter(); Fill(state.numDocsInStore - docWriter.DocStoreOffset); } if (fieldsWriter != null) { fieldsWriter.Dispose(); fieldsWriter = null; lastDocID = 0; System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; if (4 + ((long) state.numDocsInStore) * 8 != state.directory.FileLength(fileName)) throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); } } }
// TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... public override void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates var allFields = new List<FreqProxTermsWriterPerField>(); foreach(var entry in threadsAndFields) { var fields = entry.Value; foreach(var i in fields) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i; if (perField.termsHashPerField.numPostings > 0) allFields.Add(perField); } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; // TODO: allow Lucene user to customize this consumer: FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); /* Current writer chain: FormatPostingsFieldsConsumer -> IMPL: FormatPostingsFieldsWriter -> FormatPostingsTermsConsumer -> IMPL: FormatPostingsTermsWriter -> FormatPostingsDocConsumer -> IMPL: FormatPostingsDocWriter -> FormatPostingsPositionsConsumer -> IMPL: FormatPostingsPositionsWriter */ int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = allFields[start].fieldInfo; System.String fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && allFields[end].fieldInfo.name.Equals(fieldName)) end++; FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(fields, consumer); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.Reset(); perField.ShrinkHash(numPostings); fields[i].Reset(); } start = end; } foreach(var entry in threadsAndFields) { FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.Key; perThread.termsHashPerThread.Reset(true); } consumer.Finish(); }
/// <summary>If there are fields we've seen but did not see again /// in the last run, then free them up. /// </summary> internal void TrimFields(SegmentWriteState state) { for (int i = 0; i < fieldHash.Length; i++) { DocFieldProcessorPerField perField = fieldHash[i]; DocFieldProcessorPerField lastPerField = null; while (perField != null) { if (perField.lastGen == - 1) { // This field was not seen since the previous // flush, so, free up its resources now // Unhash if (lastPerField == null) fieldHash[i] = perField.next; else lastPerField.next = perField.next; if (state.docWriter.infoStream != null) state.docWriter.infoStream.WriteLine(" purge field=" + perField.fieldInfo.name); totalFieldCount--; } else { // Reset perField.lastGen = - 1; lastPerField = perField; } perField = perField.next; } } }
internal void ShrinkFreePostings(IDictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = 1; if (newSize != postingsFreeList.Length) { if (postingsFreeCount > newSize) { if (trackAllocations) { docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting); } postingsFreeCount = newSize; postingsAllocCount = newSize; } RawPostingList[] newArray = new RawPostingList[newSize]; Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } }
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(IDictionary<InvertedDocEndConsumerPerThread,ICollection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) { IDictionary<FieldInfo, IList<NormsWriterPerField>> byField = new HashMap<FieldInfo, IList<NormsWriterPerField>>(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo foreach(var entry in threadsAndFields) { ICollection<InvertedDocEndConsumerPerField> fields = entry.Value; IEnumerator<InvertedDocEndConsumerPerField> fieldsIt = fields.GetEnumerator(); var fieldsToRemove = new HashSet<NormsWriterPerField>(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.Current; if (perField.upto > 0) { // It has some norms IList<NormsWriterPerField> l = byField[perField.fieldInfo]; if (l == null) { l = new List<NormsWriterPerField>(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } foreach (var field in fieldsToRemove) { fields.Remove(field); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles.Add(normsFileName); IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); IList<NormsWriterPerField> toMerge = byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) fields[j] = toMerge[j]; int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" +(fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) normsOut.WriteByte(defaultNorm); normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) normsOut.WriteByte(defaultNorm); } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) normsOut.WriteByte(defaultNorm); } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.FilePointer, ".nrm file size mismatch: expected=" +(4 + normCount * state.numDocs) + " actual=" + normsOut.FilePointer); } } finally { normsOut.Close(); } }
public override void CloseDocStore(SegmentWriteState state) { consumer.CloseDocStore(state); fieldsWriter.CloseDocStore(state); }
/// <summary>Called when DocumentsWriter decides to create a new /// segment /// </summary> public abstract void Flush(IDictionary <DocFieldConsumerPerThread, ICollection <DocFieldConsumerPerField> > threadsAndFields, SegmentWriteState state);
public abstract void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state);