internal bool HasPayloads; // if enabled, and we actually saw any for this field public TermVectorsConsumerPerField(TermsHashPerField termsHashPerField, TermVectorsConsumer termsWriter, FieldInfo fieldInfo) { this.TermsHashPerField = termsHashPerField; this.TermsWriter = termsWriter; this.FieldInfo = fieldInfo; DocState = termsHashPerField.DocState; FieldState = termsHashPerField.FieldState; }
public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo) { this.termsHashPerField = termsHashPerField; this.perThread = perThread; this.termsWriter = perThread.termsWriter; this.fieldInfo = fieldInfo; docState = termsHashPerField.docState; fieldState = termsHashPerField.fieldState; }
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) { this.termsHashPerField = termsHashPerField; this.perThread = perThread; this.fieldInfo = fieldInfo; docState = termsHashPerField.docState; fieldState = termsHashPerField.fieldState; omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; }
public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHashPerThread perThread, TermsHashPerThread nextPerThread, FieldInfo fieldInfo) { this.perThread = perThread; intPool = perThread.intPool; charPool = perThread.charPool; bytePool = perThread.bytePool; docState = perThread.docState; fieldState = docInverterPerField.fieldState; this.consumer = perThread.consumer.addField(this, fieldInfo); streamCount = consumer.getStreamCount(); numPostingInt = 2 * streamCount; this.fieldInfo = fieldInfo; if (nextPerThread != null) nextPerField = (TermsHashPerField)nextPerThread.addField(docInverterPerField, fieldInfo); else nextPerField = null; }
public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo) { IntPool = termsHash.IntPool; BytePool = termsHash.BytePool; TermBytePool = termsHash.TermBytePool; DocState = termsHash.DocState; this.TermsHash = termsHash; BytesUsed = termsHash.BytesUsed; FieldState = docInverterPerField.FieldState; this.Consumer = termsHash.Consumer.AddField(this, fieldInfo); PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, BytesUsed); BytesHash = new BytesRefHash(TermBytePool, HASH_INIT_SIZE, byteStarts); StreamCount = Consumer.StreamCount; NumPostingInt = 2 * StreamCount; this.FieldInfo = fieldInfo; if (nextTermsHash != null) { NextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo); } else { NextPerField = null; } }
internal override void Flush(IDictionary <InvertedDocConsumerPerThread, ICollection <InvertedDocConsumerPerField> > threadsAndFields, SegmentWriteState state, IState s) { lock (this) { var childThreadsAndFields = new Dictionary <TermsHashConsumerPerThread, ICollection <TermsHashConsumerPerField> >(); Dictionary <InvertedDocConsumerPerThread, ICollection <InvertedDocConsumerPerField> > nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new Dictionary <InvertedDocConsumerPerThread, ICollection <InvertedDocConsumerPerField> >(); } else { nextThreadsAndFields = null; } foreach (var entry in threadsAndFields) { TermsHashPerThread perThread = (TermsHashPerThread)entry.Key; ICollection <InvertedDocConsumerPerField> fields = entry.Value; var fieldsIt = fields.GetEnumerator(); ICollection <TermsHashConsumerPerField> childFields = new HashSet <TermsHashConsumerPerField>(); ICollection <InvertedDocConsumerPerField> nextChildFields; if (nextTermsHash != null) { nextChildFields = new HashSet <InvertedDocConsumerPerField>(); } else { nextChildFields = null; } while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField)fieldsIt.Current; childFields.Add(perField.consumer); if (nextTermsHash != null) { nextChildFields.Add(perField.nextPerField); } } childThreadsAndFields[perThread.consumer] = childFields; if (nextTermsHash != null) { nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; } } consumer.Flush(childThreadsAndFields, state, s); ShrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) { nextTermsHash.Flush(nextThreadsAndFields, state, s); } } }
public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo); }
internal PostingsBytesStartArray(TermsHashPerField perField, Counter bytesUsed) { this.PerField = perField; this.BytesUsed_Renamed = bytesUsed; }
public override void Flush(IDictionary <string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates IList <FreqProxTermsWriterPerField> allFields = new JCG.List <FreqProxTermsWriterPerField>(); foreach (TermsHashConsumerPerField f in fieldsToFlush.Values) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)f; if (perField.termsHashPerField.bytesHash.Count > 0) { allFields.Add(perField); } } int numAllFields = allFields.Count; // Sort by field name CollectionUtil.IntroSort(allFields); FieldsConsumer consumer = state.SegmentInfo.Codec.PostingsFormat.FieldsConsumer(state); bool success = false; try { TermsHash termsHash = null; /* * Current writer chain: * FieldsConsumer * -> IMPL: FormatPostingsTermsDictWriter * -> TermsConsumer * -> IMPL: FormatPostingsTermsDictWriter.TermsWriter * -> DocsConsumer * -> IMPL: FormatPostingsDocsWriter * -> PositionsConsumer * -> IMPL: FormatPostingsPositionsWriter */ for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) { FieldInfo fieldInfo = allFields[fieldNumber].fieldInfo; FreqProxTermsWriterPerField fieldWriter = allFields[fieldNumber]; // If this field has postings then add them to the // segment fieldWriter.Flush(fieldInfo.Name, consumer, state); TermsHashPerField perField = fieldWriter.termsHashPerField; if (Debugging.AssertsEnabled) { Debugging.Assert(termsHash == null || termsHash == perField.termsHash); } termsHash = perField.termsHash; int numPostings = perField.bytesHash.Count; perField.Reset(); perField.ShrinkHash(/* numPostings // LUCENENET: Not used */); fieldWriter.Reset(); } if (termsHash != null) { termsHash.Reset(); } success = true; } finally { if (success) { IOUtils.Dispose(consumer); } else { IOUtils.DisposeWhileHandlingException(consumer); } } }
// TODO: would be nice to factor out morme of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... internal override void flush(IDictionary <object, object> threadsAndFields, DocumentsWriter.FlushState state) { // Gather all FieldData's that have postings, across all // ThreadStates List <object> allFields = new List <object>(); IEnumerator <KeyValuePair <object, object> > it = threadsAndFields.GetEnumerator(); while (it.MoveNext()) { KeyValuePair <object, object> entry = (KeyValuePair <object, object>)it.Current; ICollection <object> fields = (ICollection <object>)entry.Value; IEnumerator <object> fieldsIt = fields.GetEnumerator(); while (fieldsIt.MoveNext()) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)fieldsIt.Current; if (perField.termsHashPerField.numPostings > 0) { allFields.Add(perField); } } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; TermInfosWriter termsOut = new TermInfosWriter(state.directory, state.segmentName, fieldInfos, state.docWriter.writer.GetTermIndexInterval()); IndexOutput freqOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.FREQ_EXTENSION)); IndexOutput proxOut; if (fieldInfos.HasProx()) { proxOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.PROX_EXTENSION)); } else { proxOut = null; } DefaultSkipListWriter skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, state.numDocsInRAM, freqOut, proxOut); int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo; string fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName)) { end++; } FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = (FreqProxTermsWriterPerField)allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(state, fields, termsOut, freqOut, proxOut, skipListWriter); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.reset(); perField.shrinkHash(numPostings); fields[i].reset(); } start = end; } it = threadsAndFields.GetEnumerator(); while (it.MoveNext()) { KeyValuePair <object, object> entry = (KeyValuePair <object, object>)it.Current; FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.reset(true); } freqOut.Close(); if (proxOut != null) { state.flushedFiles[state.SegmentFileName(IndexFileNames.PROX_EXTENSION)] = state.SegmentFileName(IndexFileNames.PROX_EXTENSION); proxOut.Close(); } termsOut.Close(); // Record all files we have flushed state.flushedFiles[state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION)] = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); state.flushedFiles[state.SegmentFileName(IndexFileNames.FREQ_EXTENSION)] = state.SegmentFileName(IndexFileNames.FREQ_EXTENSION); state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_EXTENSION); state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION); }
public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { return(new TermVectorsConsumerPerField(termsHashPerField, this, fieldInfo)); }
internal override void flush(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state) { lock (this) { IDictionary <object, object> childThreadsAndFields = new Dictionary <object, object>(); IDictionary <object, ICollection <object> > nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new Dictionary <object, ICollection <object> >(); } else { nextThreadsAndFields = null; } IEnumerator <KeyValuePair <object, ICollection <object> > > it = threadsAndFields.GetEnumerator(); while (it.MoveNext()) { KeyValuePair <object, ICollection <object> > entry = it.Current; TermsHashPerThread perThread = (TermsHashPerThread)entry.Key; ICollection <object> fields = entry.Value; IEnumerator <object> fieldsIt = fields.GetEnumerator(); IDictionary <object, object> childFields = new Dictionary <object, object>(); IDictionary <object, object> nextChildFields; if (nextTermsHash != null) { nextChildFields = new Dictionary <object, object>(); } else { nextChildFields = null; } while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField)fieldsIt.Current; childFields[perField.consumer] = perField.consumer; if (nextTermsHash != null) { nextChildFields[perField.nextPerField] = perField.nextPerField; } } childThreadsAndFields[perThread.consumer] = childFields.Keys; if (nextTermsHash != null) { nextThreadsAndFields[perThread.nextPerThread] = nextChildFields.Keys; } } consumer.flush(childThreadsAndFields, state); shrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) { nextTermsHash.flush(nextThreadsAndFields, state); } } }
internal override void Flush(Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> > threadsAndFields, SegmentWriteState state) { lock (this) { Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > childThreadsAndFields = new Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> >(); Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> > nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new Support.Dictionary <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> >(); } else { nextThreadsAndFields = null; } foreach (KeyValuePair <InvertedDocConsumerPerThread, IList <InvertedDocConsumerPerField> > entry in threadsAndFields) { TermsHashPerThread perThread = (TermsHashPerThread)entry.Key; IList <InvertedDocConsumerPerField> fields = entry.Value; IEnumerator <InvertedDocConsumerPerField> fieldsIt = fields.GetEnumerator(); List <TermsHashConsumerPerField> childFields = new List <TermsHashConsumerPerField>(); List <InvertedDocConsumerPerField> nextChildFields; if (nextTermsHash != null) { nextChildFields = new List <InvertedDocConsumerPerField>(); } else { nextChildFields = null; } while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField)fieldsIt.Current; childFields.Add(perField.consumer); if (nextTermsHash != null) { nextChildFields.Add(perField.nextPerField); } } childThreadsAndFields[perThread.consumer] = childFields; if (nextTermsHash != null) { nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; } } consumer.Flush(childThreadsAndFields, state); ShrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) { nextTermsHash.Flush(nextThreadsAndFields, state); } } }
public PostingComparer(TermsHashPerField parent) { this._parent = parent; }
// TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... public override void Flush(Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates List <FreqProxTermsWriterPerField> allFields = new List <FreqProxTermsWriterPerField>(); foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields) { IList <TermsHashConsumerPerField> fields = entry.Value; foreach (TermsHashConsumerPerField i in fields) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i; if (perField.termsHashPerField.numPostings > 0) { allFields.Add(perField); } } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; // TODO: allow Lucene user to customize this consumer: FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); /* * Current writer chain: * FormatPostingsFieldsConsumer * -> IMPL: FormatPostingsFieldsWriter * -> FormatPostingsTermsConsumer * -> IMPL: FormatPostingsTermsWriter * -> FormatPostingsDocConsumer * -> IMPL: FormatPostingsDocWriter * -> FormatPostingsPositionsConsumer * -> IMPL: FormatPostingsPositionsWriter */ int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo; System.String fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName)) { end++; } FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = (FreqProxTermsWriterPerField)allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(fields, consumer); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.Reset(); perField.ShrinkHash(numPostings); fields[i].Reset(); } start = end; } foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields) { FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.Reset(true); } consumer.Finish(); }
internal override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new System.Collections.Hashtable(); } else { nextThreadsAndFields = null; } System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; TermsHashPerThread perThread = (TermsHashPerThread)entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable childFields = new System.Collections.Hashtable(); System.Collections.Hashtable nextChildFields; if (nextTermsHash != null) { nextChildFields = new System.Collections.Hashtable(); } else { nextChildFields = null; } while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; childFields[perField.consumer] = perField.consumer; if (nextTermsHash != null) { nextChildFields[perField.nextPerField] = perField.nextPerField; } } childThreadsAndFields[perThread.consumer] = childFields; if (nextTermsHash != null) { nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; } } consumer.Flush(childThreadsAndFields, state); ShrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) { nextTermsHash.Flush(nextThreadsAndFields, state); } } }
// TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates System.Collections.ArrayList allFields = new System.Collections.ArrayList(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); while (fieldsIt.MoveNext()) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; if (perField.termsHashPerField.numPostings > 0) { allFields.Add(perField); } } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; // TODO: allow Lucene user to customize this consumer: FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); /* * Current writer chain: * FormatPostingsFieldsConsumer * -> IMPL: FormatPostingsFieldsWriter * -> FormatPostingsTermsConsumer * -> IMPL: FormatPostingsTermsWriter * -> FormatPostingsDocConsumer * -> IMPL: FormatPostingsDocWriter * -> FormatPostingsPositionsConsumer * -> IMPL: FormatPostingsPositionsWriter */ int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo; System.String fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName)) { end++; } FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = (FreqProxTermsWriterPerField)allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(fields, consumer); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.Reset(); perField.ShrinkHash(numPostings); fields[i].Reset(); } start = end; } it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.Reset(true); } consumer.Finish(); }
abstract public TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo);
public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { return(new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo)); }
internal PostingsBytesStartArray(TermsHashPerField perField, Counter bytesUsed) { this.perField = perField; this.bytesUsed = bytesUsed; }
public abstract TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo);