public FreqProxFieldMergeState(FreqProxTermsWriterPerField field)
		{
			this.field = field;
			this.charPool = field.perThread.termsHashPerThread.charPool;
			this.numPostings = field.termsHashPerField.numPostings;
			this.postings = field.termsHashPerField.SortPostings();
		}
Esempio n. 2
0
 public FreqProxFieldMergeState(FreqProxTermsWriterPerField field)
 {
     this.field       = field;
     this.charPool    = field.perThread.termsHashPerThread.charPool;
     this.numPostings = field.termsHashPerField.numPostings;
     this.postings    = field.termsHashPerField.SortPostings();
 }
		// TODO: would be nice to factor out more of this, eg the
		// FreqProxFieldMergeState, and code to visit all Fields
		// under the same FieldInfo together, up into TermsHash*.
		// Other writers would presumably share alot of this...
        public override void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state)
		{
			
			// Gather all FieldData's that have postings, across all
			// ThreadStates
			var allFields = new List<FreqProxTermsWriterPerField>();

            foreach(var entry in threadsAndFields)
			{
				var fields = entry.Value;
				
				foreach(var i in fields)
				{
					FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i;
					if (perField.termsHashPerField.numPostings > 0)
						allFields.Add(perField);
				}
			}
			
			// Sort by field name
            allFields.Sort();
			int numAllFields = allFields.Count;
			
			// TODO: allow Lucene user to customize this consumer:
			FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
			/*
			Current writer chain:
			FormatPostingsFieldsConsumer
			-> IMPL: FormatPostingsFieldsWriter
			-> FormatPostingsTermsConsumer
			-> IMPL: FormatPostingsTermsWriter
			-> FormatPostingsDocConsumer
			-> IMPL: FormatPostingsDocWriter
			-> FormatPostingsPositionsConsumer
			-> IMPL: FormatPostingsPositionsWriter
			*/
			
			int start = 0;
			while (start < numAllFields)
			{
				FieldInfo fieldInfo = allFields[start].fieldInfo;
				System.String fieldName = fieldInfo.name;
				
				int end = start + 1;
				while (end < numAllFields && allFields[end].fieldInfo.name.Equals(fieldName))
					end++;
				
				FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
				for (int i = start; i < end; i++)
				{
					fields[i - start] = allFields[i];
					
					// Aggregate the storePayload as seen by the same
					// field across multiple threads
					fieldInfo.storePayloads |= fields[i - start].hasPayloads;
				}
				
				// If this field has postings then add them to the
				// segment
				AppendPostings(fields, consumer);
				
				for (int i = 0; i < fields.Length; i++)
				{
					TermsHashPerField perField = fields[i].termsHashPerField;
					int numPostings = perField.numPostings;
					perField.Reset();
					perField.ShrinkHash(numPostings);
					fields[i].Reset();
				}
				
				start = end;
			}

            foreach(var entry in threadsAndFields)
			{
				FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.Key;
				perThread.termsHashPerThread.Reset(true);
			}
			
			consumer.Finish();
		}
		/* Walk through all unique text tokens (Posting
		* instances) found in this field and serialize them
		* into a single RAM segment. */
		internal void  AppendPostings(FreqProxTermsWriterPerField[] fields, FormatPostingsFieldsConsumer consumer)
		{
			
			int numFields = fields.Length;
			
			FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields];
			
			for (int i = 0; i < numFields; i++)
			{
				FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]);
				
				System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields [0].fieldInfo);
				
				// Should always be true
				bool result = fms.NextTerm();
				System.Diagnostics.Debug.Assert(result);
			}
			
			FormatPostingsTermsConsumer termsConsumer = consumer.AddField(fields[0].fieldInfo);
			
			FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];
			
			bool currentFieldOmitTermFreqAndPositions = fields[0].fieldInfo.omitTermFreqAndPositions;
			
			while (numFields > 0)
			{
				
				// Get the next term to merge
				termStates[0] = mergeStates[0];
				int numToMerge = 1;
				
				for (int i = 1; i < numFields; i++)
				{
					char[] text = mergeStates[i].text;
					int textOffset = mergeStates[i].textOffset;
					int cmp = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset);
					
					if (cmp < 0)
					{
						termStates[0] = mergeStates[i];
						numToMerge = 1;
					}
					else if (cmp == 0)
						termStates[numToMerge++] = mergeStates[i];
				}
				
				FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(termStates[0].text, termStates[0].textOffset);
				
				// Now termStates has numToMerge FieldMergeStates
				// which all share the same term.  Now we must
				// interleave the docID streams.
				while (numToMerge > 0)
				{
					
					FreqProxFieldMergeState minState = termStates[0];
					for (int i = 1; i < numToMerge; i++)
						if (termStates[i].docID < minState.docID)
							minState = termStates[i];
					
					int termDocFreq = minState.termFreq;
					
					FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(minState.docID, termDocFreq);
					
					ByteSliceReader prox = minState.prox;
					
					// Carefully copy over the prox + payload info,
					// changing the format to match Lucene's segment
					// format.
					if (!currentFieldOmitTermFreqAndPositions)
					{
						// omitTermFreqAndPositions == false so we do write positions &
						// payload          
						int position = 0;
						for (int j = 0; j < termDocFreq; j++)
						{
							int code = prox.ReadVInt();
							position += (code >> 1);
							
							int payloadLength;
							if ((code & 1) != 0)
							{
								// This position has a payload
								payloadLength = prox.ReadVInt();
								
								if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
									payloadBuffer = new byte[payloadLength];
								
								prox.ReadBytes(payloadBuffer, 0, payloadLength);
							}
							else
								payloadLength = 0;
							
							posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
						} //End for
						
						posConsumer.Finish();
					}
					
					if (!minState.NextDoc())
					{
						
						// Remove from termStates
						int upto = 0;
						for (int i = 0; i < numToMerge; i++)
							if (termStates[i] != minState)
								termStates[upto++] = termStates[i];
						numToMerge--;
						System.Diagnostics.Debug.Assert(upto == numToMerge);
						
						// Advance this state to the next term
						
						if (!minState.NextTerm())
						{
							// OK, no more terms, so remove from mergeStates
							// as well
							upto = 0;
							for (int i = 0; i < numFields; i++)
								if (mergeStates[i] != minState)
									mergeStates[upto++] = mergeStates[i];
							numFields--;
							System.Diagnostics.Debug.Assert(upto == numFields);
						}
					}
				}
				
				docConsumer.Finish();
			}
			
			termsConsumer.Finish();
		}
Esempio n. 5
0
        public override void Flush(IDictionary <string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            IList <FreqProxTermsWriterPerField> allFields = new List <FreqProxTermsWriterPerField>();

            foreach (TermsHashConsumerPerField f in fieldsToFlush.Values)
            {
                FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)f;
                if (perField.termsHashPerField.bytesHash.Count > 0)
                {
                    allFields.Add(perField);
                }
            }

            int numAllFields = allFields.Count;

            // Sort by field name
            CollectionUtil.IntroSort(allFields);

            FieldsConsumer consumer = state.SegmentInfo.Codec.PostingsFormat.FieldsConsumer(state);

            bool success = false;

            try
            {
                TermsHash termsHash = null;

                /*
                 * Current writer chain:
                 * FieldsConsumer
                 * -> IMPL: FormatPostingsTermsDictWriter
                 *  -> TermsConsumer
                 *    -> IMPL: FormatPostingsTermsDictWriter.TermsWriter
                 *      -> DocsConsumer
                 *        -> IMPL: FormatPostingsDocsWriter
                 *          -> PositionsConsumer
                 *            -> IMPL: FormatPostingsPositionsWriter
                 */

                for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++)
                {
                    FieldInfo fieldInfo = allFields[fieldNumber].fieldInfo;

                    FreqProxTermsWriterPerField fieldWriter = allFields[fieldNumber];

                    // If this field has postings then add them to the
                    // segment
                    fieldWriter.Flush(fieldInfo.Name, consumer, state);

                    TermsHashPerField perField = fieldWriter.termsHashPerField;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(termsHash == null || termsHash == perField.termsHash);
                    }
                    termsHash = perField.termsHash;
                    int numPostings = perField.bytesHash.Count;
                    perField.Reset();
                    perField.ShrinkHash(numPostings);
                    fieldWriter.Reset();
                }

                if (termsHash != null)
                {
                    termsHash.Reset();
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(consumer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(consumer);
                }
            }
        }
		// TODO: would be nice to factor out more of this, eg the
		// FreqProxFieldMergeState, and code to visit all Fields
		// under the same FieldInfo together, up into TermsHash*.
		// Other writers would presumably share alot of this...
		
		public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
		{
			
			// Gather all FieldData's that have postings, across all
			// ThreadStates
			System.Collections.ArrayList allFields = new System.Collections.ArrayList();

            System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
			while (it.MoveNext())
			{
				
				System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current;
				
				System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value;
				
				System.Collections.IEnumerator fieldsIt = fields.GetEnumerator();
				
				while (fieldsIt.MoveNext())
				{
					FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) ((System.Collections.DictionaryEntry) fieldsIt.Current).Key;
					if (perField.termsHashPerField.numPostings > 0)
						allFields.Add(perField);
				}
			}
			
			// Sort by field name
            allFields.Sort();
			int numAllFields = allFields.Count;
			
			// TODO: allow Lucene user to customize this consumer:
			FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
			/*
			Current writer chain:
			FormatPostingsFieldsConsumer
			-> IMPL: FormatPostingsFieldsWriter
			-> FormatPostingsTermsConsumer
			-> IMPL: FormatPostingsTermsWriter
			-> FormatPostingsDocConsumer
			-> IMPL: FormatPostingsDocWriter
			-> FormatPostingsPositionsConsumer
			-> IMPL: FormatPostingsPositionsWriter
			*/
			
			int start = 0;
			while (start < numAllFields)
			{
				FieldInfo fieldInfo = ((FreqProxTermsWriterPerField) allFields[start]).fieldInfo;
				System.String fieldName = fieldInfo.name;
				
				int end = start + 1;
				while (end < numAllFields && ((FreqProxTermsWriterPerField) allFields[end]).fieldInfo.name.Equals(fieldName))
					end++;
				
				FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
				for (int i = start; i < end; i++)
				{
					fields[i - start] = (FreqProxTermsWriterPerField) allFields[i];
					
					// Aggregate the storePayload as seen by the same
					// field across multiple threads
					fieldInfo.storePayloads |= fields[i - start].hasPayloads;
				}
				
				// If this field has postings then add them to the
				// segment
				AppendPostings(fields, consumer);
				
				for (int i = 0; i < fields.Length; i++)
				{
					TermsHashPerField perField = fields[i].termsHashPerField;
					int numPostings = perField.numPostings;
					perField.Reset();
					perField.ShrinkHash(numPostings);
					fields[i].Reset();
				}
				
				start = end;
			}

            it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
			while (it.MoveNext())
			{
				System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current;
				FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.Key;
				perThread.termsHashPerThread.Reset(true);
			}
			
			consumer.Finish();
		}
        // TODO: would be nice to factor out more of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...

        public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            System.Collections.ArrayList allFields = new System.Collections.ArrayList();

            System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
            while (it.MoveNext())
            {
                System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current;

                System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value;

                System.Collections.IEnumerator fieldsIt = fields.GetEnumerator();

                while (fieldsIt.MoveNext())
                {
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key;
                    if (perField.termsHashPerField.numPostings > 0)
                    {
                        allFields.Add(perField);
                    }
                }
            }

            // Sort by field name
            allFields.Sort();
            int numAllFields = allFields.Count;

            // TODO: allow Lucene user to customize this consumer:
            FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);

            /*
             * Current writer chain:
             * FormatPostingsFieldsConsumer
             * -> IMPL: FormatPostingsFieldsWriter
             * -> FormatPostingsTermsConsumer
             * -> IMPL: FormatPostingsTermsWriter
             * -> FormatPostingsDocConsumer
             * -> IMPL: FormatPostingsDocWriter
             * -> FormatPostingsPositionsConsumer
             * -> IMPL: FormatPostingsPositionsWriter
             */

            int start = 0;

            while (start < numAllFields)
            {
                FieldInfo     fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                System.String fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))
                {
                    end++;
                }

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                {
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;
                }

                // If this field has postings then add them to the
                // segment
                AppendPostings(fields, consumer);

                for (int i = 0; i < fields.Length; i++)
                {
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings            = perField.numPostings;
                    perField.Reset();
                    perField.ShrinkHash(numPostings);
                    fields[i].Reset();
                }

                start = end;
            }

            it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
            while (it.MoveNext())
            {
                System.Collections.DictionaryEntry entry     = (System.Collections.DictionaryEntry)it.Current;
                FreqProxTermsWriterPerThread       perThread = (FreqProxTermsWriterPerThread)entry.Key;
                perThread.termsHashPerThread.Reset(true);
            }

            consumer.Finish();
        }
        public int CompareTo(System.Object other0)
        {
            FreqProxTermsWriterPerField other = (FreqProxTermsWriterPerField)other0;

            return(String.CompareOrdinal(fieldInfo.name, other.fieldInfo.name));
        }
        // TODO: would be nice to factor out morme of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...
        internal override void flush(IDictionary<object, object> threadsAndFields, DocumentsWriter.FlushState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            List<object> allFields = new List<object>();

            IEnumerator<KeyValuePair<object, object>> it = threadsAndFields.GetEnumerator();
            while (it.MoveNext())
            {

                KeyValuePair<object, object> entry = (KeyValuePair<object, object>)it.Current;

                ICollection<object> fields = (ICollection<object>)entry.Value;

                IEnumerator<object> fieldsIt = fields.GetEnumerator();

                while (fieldsIt.MoveNext())
                {
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)fieldsIt.Current;
                    if (perField.termsHashPerField.numPostings > 0)
                        allFields.Add(perField);
                }
            }

            // Sort by field name
            allFields.Sort();
            int numAllFields = allFields.Count;

            TermInfosWriter termsOut = new TermInfosWriter(state.directory,
                                                                 state.segmentName,
                                                                 fieldInfos,
                                                                 state.docWriter.writer.GetTermIndexInterval());

            IndexOutput freqOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.FREQ_EXTENSION));
            IndexOutput proxOut;

            if (fieldInfos.HasProx())
                proxOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.PROX_EXTENSION));
            else
                proxOut = null;

            DefaultSkipListWriter skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval,
                                                                                   termsOut.maxSkipLevels,
                                                                                   state.numDocsInRAM, freqOut, proxOut);

            int start = 0;
            while (start < numAllFields)
            {
                FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                string fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))
                    end++;

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                {
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;
                }

                // If this field has postings then add them to the
                // segment
                AppendPostings(state, fields, termsOut, freqOut, proxOut, skipListWriter);

                for (int i = 0; i < fields.Length; i++)
                {
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings = perField.numPostings;
                    perField.reset();
                    perField.shrinkHash(numPostings);
                    fields[i].reset();
                }

                start = end;
            }

            it = threadsAndFields.GetEnumerator();
            while (it.MoveNext())
            {
                KeyValuePair<object, object> entry = (KeyValuePair<object, object>)it.Current;
                FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key;
                perThread.termsHashPerThread.reset(true);
            }

            freqOut.Close();
            if (proxOut != null)
            {
                state.flushedFiles[state.SegmentFileName(IndexFileNames.PROX_EXTENSION)] = state.SegmentFileName(IndexFileNames.PROX_EXTENSION);
                proxOut.Close();
            }
            termsOut.Close();

            // Record all files we have flushed
            state.flushedFiles[state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION)] = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.FREQ_EXTENSION)] = state.SegmentFileName(IndexFileNames.FREQ_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION);
        }
        /* Walk through all unique text tokens (Posting
         * instances) found in this field and serialize them
         * into a single RAM segment. */
        void AppendPostings(DocumentsWriter.FlushState flushState,
                            FreqProxTermsWriterPerField[] fields,
                            TermInfosWriter termsOut,
                            IndexOutput freqOut,
                            IndexOutput proxOut,
                            DefaultSkipListWriter skipListWriter)
        {
            int fieldNumber = fields[0].fieldInfo.number;
            int numFields = fields.Length;

            FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields];

            for (int i = 0; i < numFields; i++)
            {
                FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]);

                System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields[0].fieldInfo);

                // Should always be true
                bool result = fms.nextTerm();
                System.Diagnostics.Debug.Assert(result);
            }

            int skipInterval = termsOut.skipInterval;
            bool currentFieldOmitTf = fields[0].fieldInfo.omitTf;

            // If current field omits tf then it cannot store
            // payloads.  We silently drop the payloads in this case:
            bool currentFieldStorePayloads = currentFieldOmitTf ? false : fields[0].fieldInfo.storePayloads;

            FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];

            while (numFields > 0)
            {

                // Get the next term to merge
                termStates[0] = mergeStates[0];
                int numToMerge = 1;

                for (int i = 1; i < numFields; i++)
                {
                    char[] text = mergeStates[i].text;
                    int textOffset = mergeStates[i].textOffset;
                    int cmp = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset);

                    if (cmp < 0)
                    {
                        termStates[0] = mergeStates[i];
                        numToMerge = 1;
                    }
                    else if (cmp == 0)
                        termStates[numToMerge++] = mergeStates[i];
                }

                int df = 0;
                int lastPayloadLength = -1;

                int lastDoc = 0;

                char[] text_Renamed = termStates[0].text;
                int start = termStates[0].textOffset;

                long freqPointer = freqOut.GetFilePointer();
                long proxPointer;
                if (proxOut != null)
                    proxPointer = proxOut.GetFilePointer();
                else
                    proxPointer = 0;

                skipListWriter.ResetSkip();

                // Now termStates has numToMerge FieldMergeStates
                // which all share the same term.  Now we must
                // interleave the docID streams.
                while (numToMerge > 0)
                {

                    if ((++df % skipInterval) == 0)
                    {
                        skipListWriter.SetSkipData(lastDoc, currentFieldStorePayloads, lastPayloadLength);
                        skipListWriter.BufferSkip(df);
                    }

                    FreqProxFieldMergeState minState = termStates[0];
                    for (int i = 1; i < numToMerge; i++)
                        if (termStates[i].docID < minState.docID)
                            minState = termStates[i];

                    int doc = minState.docID;
                    int termDocFreq = minState.termFreq;

                    System.Diagnostics.Debug.Assert(doc < flushState.numDocsInRAM);
                    System.Diagnostics.Debug.Assert(doc > lastDoc || df == 1);

                    ByteSliceReader prox = minState.prox;

                    // Carefully copy over the prox + payload info,
                    // changing the format to match Lucene's segment
                    // format.
                    if (!currentFieldOmitTf)
                    {
                        // omitTf == false so we do write positions & payload
                        System.Diagnostics.Debug.Assert(proxOut != null);
                        for (int j = 0; j < termDocFreq; j++)
                        {
                            int code = prox.ReadVInt();
                            if (currentFieldStorePayloads)
                            {
                                int payloadLength;
                                if ((code & 1) != 0)
                                {
                                    // This position has a payload
                                    payloadLength = prox.ReadVInt();
                                }
                                else
                                    payloadLength = 0;
                                if (payloadLength != lastPayloadLength)
                                {
                                    proxOut.WriteVInt(code | 1);
                                    proxOut.WriteVInt(payloadLength);
                                    lastPayloadLength = payloadLength;
                                }
                                else
                                    proxOut.WriteVInt(code & (~1));
                                if (payloadLength > 0)
                                    copyBytes(prox, proxOut, payloadLength);
                            }
                            else
                            {
                                System.Diagnostics.Debug.Assert(0 == (code & 1));
                                proxOut.WriteVInt(code >> 1);
                            }
                        } //End for

                        int newDocCode = (doc - lastDoc) << 1;

                        if (1 == termDocFreq)
                        {
                            freqOut.WriteVInt(newDocCode | 1);
                        }
                        else
                        {
                            freqOut.WriteVInt(newDocCode);
                            freqOut.WriteVInt(termDocFreq);
                        }
                    }
                    else
                    {
                        // omitTf==true: we store only the docs, without
                        // term freq, positions, payloads
                        freqOut.WriteVInt(doc - lastDoc);
                    }

                    lastDoc = doc;

                    if (!minState.nextDoc())
                    {

                        // Remove from termStates
                        int upto = 0;
                        for (int i = 0; i < numToMerge; i++)
                            if (termStates[i] != minState)
                                termStates[upto++] = termStates[i];
                        numToMerge--;
                        System.Diagnostics.Debug.Assert(upto == numToMerge);

                        // Advance this state to the next term

                        if (!minState.nextTerm())
                        {
                            // OK, no more terms, so remove from mergeStates
                            // as well
                            upto = 0;
                            for (int i = 0; i < numFields; i++)
                                if (mergeStates[i] != minState)
                                    mergeStates[upto++] = mergeStates[i];
                            numFields--;
                            System.Diagnostics.Debug.Assert(upto == numFields);
                        }
                    }
                }

                System.Diagnostics.Debug.Assert(df > 0);

                // Done merging this term

                long skipPointer = skipListWriter.WriteSkip(freqOut);

                // Write term
                termInfo.Set(df, freqPointer, proxPointer, (int)(skipPointer - freqPointer));

                // TODO: we could do this incrementally
                UnicodeUtil.UTF16toUTF8(text_Renamed, start, termsUTF8);

                // TODO: we could save O(n) re-scan of the term by
                // computing the shared prefix with the last term
                // while during the UTF8 encoding
                termsOut.Add(fieldNumber,
                             termsUTF8.result,
                             termsUTF8.length,
                             termInfo);
            }
        }
Esempio n. 11
0
        // TODO: would be nice to factor out more of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...

        public override void Flush(Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            List <FreqProxTermsWriterPerField> allFields = new List <FreqProxTermsWriterPerField>();

            foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields)
            {
                IList <TermsHashConsumerPerField> fields = entry.Value;
                foreach (TermsHashConsumerPerField i in fields)
                {
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i;
                    if (perField.termsHashPerField.numPostings > 0)
                    {
                        allFields.Add(perField);
                    }
                }
            }

            // Sort by field name
            allFields.Sort();
            int numAllFields = allFields.Count;

            // TODO: allow Lucene user to customize this consumer:
            FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);

            /*
             * Current writer chain:
             * FormatPostingsFieldsConsumer
             * -> IMPL: FormatPostingsFieldsWriter
             * -> FormatPostingsTermsConsumer
             * -> IMPL: FormatPostingsTermsWriter
             * -> FormatPostingsDocConsumer
             * -> IMPL: FormatPostingsDocWriter
             * -> FormatPostingsPositionsConsumer
             * -> IMPL: FormatPostingsPositionsWriter
             */

            int start = 0;

            while (start < numAllFields)
            {
                FieldInfo     fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                System.String fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))
                {
                    end++;
                }

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                {
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;
                }

                // If this field has postings then add them to the
                // segment
                AppendPostings(fields, consumer);

                for (int i = 0; i < fields.Length; i++)
                {
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings            = perField.numPostings;
                    perField.Reset();
                    perField.ShrinkHash(numPostings);
                    fields[i].Reset();
                }

                start = end;
            }

            foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields)
            {
                FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key;
                perThread.termsHashPerThread.Reset(true);
            }

            consumer.Finish();
        }
        // TODO: would be nice to factor out morme of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...

        internal override void flush(IDictionary <object, object> threadsAndFields, DocumentsWriter.FlushState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            List <object> allFields = new List <object>();

            IEnumerator <KeyValuePair <object, object> > it = threadsAndFields.GetEnumerator();

            while (it.MoveNext())
            {
                KeyValuePair <object, object> entry = (KeyValuePair <object, object>)it.Current;

                ICollection <object> fields = (ICollection <object>)entry.Value;

                IEnumerator <object> fieldsIt = fields.GetEnumerator();

                while (fieldsIt.MoveNext())
                {
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)fieldsIt.Current;
                    if (perField.termsHashPerField.numPostings > 0)
                    {
                        allFields.Add(perField);
                    }
                }
            }

            // Sort by field name
            allFields.Sort();
            int numAllFields = allFields.Count;

            TermInfosWriter termsOut = new TermInfosWriter(state.directory,
                                                           state.segmentName,
                                                           fieldInfos,
                                                           state.docWriter.writer.GetTermIndexInterval());

            IndexOutput freqOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.FREQ_EXTENSION));
            IndexOutput proxOut;

            if (fieldInfos.HasProx())
            {
                proxOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.PROX_EXTENSION));
            }
            else
            {
                proxOut = null;
            }

            DefaultSkipListWriter skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval,
                                                                             termsOut.maxSkipLevels,
                                                                             state.numDocsInRAM, freqOut, proxOut);

            int start = 0;

            while (start < numAllFields)
            {
                FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                string    fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))
                {
                    end++;
                }

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                {
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;
                }

                // If this field has postings then add them to the
                // segment
                AppendPostings(state, fields, termsOut, freqOut, proxOut, skipListWriter);

                for (int i = 0; i < fields.Length; i++)
                {
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings            = perField.numPostings;
                    perField.reset();
                    perField.shrinkHash(numPostings);
                    fields[i].reset();
                }

                start = end;
            }

            it = threadsAndFields.GetEnumerator();
            while (it.MoveNext())
            {
                KeyValuePair <object, object> entry     = (KeyValuePair <object, object>)it.Current;
                FreqProxTermsWriterPerThread  perThread = (FreqProxTermsWriterPerThread)entry.Key;
                perThread.termsHashPerThread.reset(true);
            }

            freqOut.Close();
            if (proxOut != null)
            {
                state.flushedFiles[state.SegmentFileName(IndexFileNames.PROX_EXTENSION)] = state.SegmentFileName(IndexFileNames.PROX_EXTENSION);
                proxOut.Close();
            }
            termsOut.Close();

            // Record all files we have flushed
            state.flushedFiles[state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION)] = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.FREQ_EXTENSION)]        = state.SegmentFileName(IndexFileNames.FREQ_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)]       = state.SegmentFileName(IndexFileNames.TERMS_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION);
        }