public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo)
 {
     this.termsHashPerField = termsHashPerField;
     this.perThread = perThread;
     this.fieldInfo = fieldInfo;
     docState = termsHashPerField.docState;
     fieldState = termsHashPerField.fieldState;
     omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
 }
 public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo)
 {
     this.termsHashPerField = termsHashPerField;
     this.perThread         = perThread;
     this.fieldInfo         = fieldInfo;
     docState   = termsHashPerField.docState;
     fieldState = termsHashPerField.fieldState;
     omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
 }
Пример #3
0
        // TODO: would be nice to factor out more of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...

        public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            System.Collections.ArrayList allFields = new System.Collections.ArrayList();

            System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
            while (it.MoveNext())
            {
                System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current;

                System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value;

                System.Collections.IEnumerator fieldsIt = fields.GetEnumerator();

                while (fieldsIt.MoveNext())
                {
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key;
                    if (perField.termsHashPerField.numPostings > 0)
                    {
                        allFields.Add(perField);
                    }
                }
            }

            // Sort by field name
            allFields.Sort();
            int numAllFields = allFields.Count;

            // TODO: allow Lucene user to customize this consumer:
            FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);

            /*
             * Current writer chain:
             * FormatPostingsFieldsConsumer
             * -> IMPL: FormatPostingsFieldsWriter
             * -> FormatPostingsTermsConsumer
             * -> IMPL: FormatPostingsTermsWriter
             * -> FormatPostingsDocConsumer
             * -> IMPL: FormatPostingsDocWriter
             * -> FormatPostingsPositionsConsumer
             * -> IMPL: FormatPostingsPositionsWriter
             */

            int start = 0;

            while (start < numAllFields)
            {
                FieldInfo     fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                System.String fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))
                {
                    end++;
                }

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                {
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;
                }

                // If this field has postings then add them to the
                // segment
                AppendPostings(fields, consumer);

                for (int i = 0; i < fields.Length; i++)
                {
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings            = perField.numPostings;
                    perField.Reset();
                    perField.ShrinkHash(numPostings);
                    fields[i].Reset();
                }

                start = end;
            }

            it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
            while (it.MoveNext())
            {
                System.Collections.DictionaryEntry entry     = (System.Collections.DictionaryEntry)it.Current;
                FreqProxTermsWriterPerThread       perThread = (FreqProxTermsWriterPerThread)entry.Key;
                perThread.termsHashPerThread.Reset(true);
            }

            consumer.Finish();
        }
Пример #4
0
        // TODO: would be nice to factor out more of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...

        public override void Flush(Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            List <FreqProxTermsWriterPerField> allFields = new List <FreqProxTermsWriterPerField>();

            foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields)
            {
                IList <TermsHashConsumerPerField> fields = entry.Value;
                foreach (TermsHashConsumerPerField i in fields)
                {
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i;
                    if (perField.termsHashPerField.numPostings > 0)
                    {
                        allFields.Add(perField);
                    }
                }
            }

            // Sort by field name
            allFields.Sort();
            int numAllFields = allFields.Count;

            // TODO: allow Lucene user to customize this consumer:
            FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);

            /*
             * Current writer chain:
             * FormatPostingsFieldsConsumer
             * -> IMPL: FormatPostingsFieldsWriter
             * -> FormatPostingsTermsConsumer
             * -> IMPL: FormatPostingsTermsWriter
             * -> FormatPostingsDocConsumer
             * -> IMPL: FormatPostingsDocWriter
             * -> FormatPostingsPositionsConsumer
             * -> IMPL: FormatPostingsPositionsWriter
             */

            int start = 0;

            while (start < numAllFields)
            {
                FieldInfo     fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                System.String fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))
                {
                    end++;
                }

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                {
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;
                }

                // If this field has postings then add them to the
                // segment
                AppendPostings(fields, consumer);

                for (int i = 0; i < fields.Length; i++)
                {
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings            = perField.numPostings;
                    perField.Reset();
                    perField.ShrinkHash(numPostings);
                    fields[i].Reset();
                }

                start = end;
            }

            foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields)
            {
                FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key;
                perThread.termsHashPerThread.Reset(true);
            }

            consumer.Finish();
        }
        // TODO: would be nice to factor out morme of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...

        internal override void flush(IDictionary <object, object> threadsAndFields, DocumentsWriter.FlushState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            List <object> allFields = new List <object>();

            IEnumerator <KeyValuePair <object, object> > it = threadsAndFields.GetEnumerator();

            while (it.MoveNext())
            {
                KeyValuePair <object, object> entry = (KeyValuePair <object, object>)it.Current;

                ICollection <object> fields = (ICollection <object>)entry.Value;

                IEnumerator <object> fieldsIt = fields.GetEnumerator();

                while (fieldsIt.MoveNext())
                {
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)fieldsIt.Current;
                    if (perField.termsHashPerField.numPostings > 0)
                    {
                        allFields.Add(perField);
                    }
                }
            }

            // Sort by field name
            allFields.Sort();
            int numAllFields = allFields.Count;

            TermInfosWriter termsOut = new TermInfosWriter(state.directory,
                                                           state.segmentName,
                                                           fieldInfos,
                                                           state.docWriter.writer.GetTermIndexInterval());

            IndexOutput freqOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.FREQ_EXTENSION));
            IndexOutput proxOut;

            if (fieldInfos.HasProx())
            {
                proxOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.PROX_EXTENSION));
            }
            else
            {
                proxOut = null;
            }

            DefaultSkipListWriter skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval,
                                                                             termsOut.maxSkipLevels,
                                                                             state.numDocsInRAM, freqOut, proxOut);

            int start = 0;

            while (start < numAllFields)
            {
                FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                string    fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))
                {
                    end++;
                }

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                {
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;
                }

                // If this field has postings then add them to the
                // segment
                AppendPostings(state, fields, termsOut, freqOut, proxOut, skipListWriter);

                for (int i = 0; i < fields.Length; i++)
                {
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings            = perField.numPostings;
                    perField.reset();
                    perField.shrinkHash(numPostings);
                    fields[i].reset();
                }

                start = end;
            }

            it = threadsAndFields.GetEnumerator();
            while (it.MoveNext())
            {
                KeyValuePair <object, object> entry     = (KeyValuePair <object, object>)it.Current;
                FreqProxTermsWriterPerThread  perThread = (FreqProxTermsWriterPerThread)entry.Key;
                perThread.termsHashPerThread.reset(true);
            }

            freqOut.Close();
            if (proxOut != null)
            {
                state.flushedFiles[state.SegmentFileName(IndexFileNames.PROX_EXTENSION)] = state.SegmentFileName(IndexFileNames.PROX_EXTENSION);
                proxOut.Close();
            }
            termsOut.Close();

            // Record all files we have flushed
            state.flushedFiles[state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION)] = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.FREQ_EXTENSION)]        = state.SegmentFileName(IndexFileNames.FREQ_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)]       = state.SegmentFileName(IndexFileNames.TERMS_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION);
        }