Exemple #1
0
 /// <summary> Sets the values for the current skip data. </summary>
 internal virtual void  SetSkipData(int doc, bool storePayloads, int payloadLength)
 {
     this.curDoc           = doc;
     this.curStorePayloads = storePayloads;
     this.curPayloadLength = payloadLength;
     this.curFreqPointer   = freqOutput.GetFilePointer();
     this.curProxPointer   = proxOutput.GetFilePointer();
 }
 /// <summary>
 /// Sets the values for the current skip data.
 /// </summary>
 public virtual void SetSkipData(int doc, bool storePayloads, int payloadLength)
 {
     this.CurDoc           = doc;
     this.CurStorePayloads = storePayloads;
     this.CurPayloadLength = payloadLength;
     this.CurFreqPointer   = FreqOutput.GetFilePointer();
     if (ProxOutput != null)
     {
         this.CurProxPointer = ProxOutput.GetFilePointer();
     }
 }
Exemple #3
0
        /// <summary>Copy the contents of the file with specified extension into the
        /// provided output stream. Use the provided buffer for moving data
        /// to reduce memory allocation.
        /// </summary>
        private void  CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
        {
            IndexInput is_Renamed = null;

            try
            {
                long startPtr = os.GetFilePointer();

                is_Renamed = directory.OpenInput(source.file);
                long length    = is_Renamed.Length();
                long remainder = length;
                int  chunk     = buffer.Length;

                while (remainder > 0)
                {
                    int len = (int)System.Math.Min(chunk, remainder);
                    is_Renamed.ReadBytes(buffer, 0, len, false);
                    os.WriteBytes(buffer, len);
                    remainder -= len;
                    if (checkAbort != null)
                    {
                        // Roughly every 2 MB we will check if
                        // it's time to abort
                        checkAbort.Work(80);
                    }
                }

                // Verify that remainder is 0
                if (remainder != 0)
                {
                    throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
                }

                // Verify that the output length diff is equal to original file
                long endPtr = os.GetFilePointer();
                long diff   = endPtr - startPtr;
                if (diff != length)
                {
                    throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
                }
            }
            finally
            {
                if (is_Renamed != null)
                {
                    is_Renamed.Close();
                }
            }
        }
Exemple #4
0
        private TermInfo termInfo = new TermInfo();         // minimize consing

        /// <summary>Merge one term found in one or more segments. The array <code>smis</code>
        /// contains segments that are positioned at the same term. <code>N</code>
        /// is the number of cells in the array actually occupied.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        private void  MergeTermInfo(SegmentMergeInfo[] smis, int n)
        {
            long freqPointer = freqOutput.GetFilePointer();
            long proxPointer = proxOutput.GetFilePointer();

            int df = AppendPostings(smis, n);             // append posting data

            long skipPointer = WriteSkip();

            if (df > 0)
            {
                // add an entry to the dictionary with pointers to prox and freq files
                termInfo.Set(df, freqPointer, proxPointer, (int)(skipPointer - freqPointer));
                termInfosWriter.Add(smis[0].term, termInfo);
            }
        }
Exemple #5
0
        /// <summary>Fills in no-term-vectors for all docs we haven't seen
        /// since the last doc that had term vectors.
        /// </summary>
        internal void  Fill(int docID)
        {
            int docStoreOffset = docWriter.GetDocStoreOffset();
            int end            = docID + docStoreOffset;

            if (lastDocID < end)
            {
                long tvfPosition = tvf.GetFilePointer();
                while (lastDocID < end)
                {
                    tvx.WriteLong(tvd.GetFilePointer());
                    tvd.WriteVInt(0);
                    tvx.WriteLong(tvfPosition);
                    lastDocID++;
                }
            }
        }
Exemple #6
0
        public virtual void  TestLargeWrites()
        {
            IndexOutput os = dir.CreateOutput("testBufferStart.txt");

            byte[] largeBuf = new byte[2048];
            for (int i = 0; i < largeBuf.Length; i++)
            {
                largeBuf[i] = (byte)((new System.Random().NextDouble()) * 256);
            }

            long currentPos = os.GetFilePointer();

            os.WriteBytes(largeBuf, largeBuf.Length);

            try
            {
                Assert.AreEqual(currentPos + largeBuf.Length, os.GetFilePointer());
            }
            finally
            {
                os.Close();
            }
        }
        /// <summary> Writes the buffered skip lists to the given output.
        ///
        /// </summary>
        /// <param name="output">the IndexOutput the skip lists shall be written to
        /// </param>
        /// <returns> the pointer the skip list starts
        /// </returns>
        internal virtual long WriteSkip(IndexOutput output)
        {
            long skipPointer = output.GetFilePointer();

            if (skipBuffer == null || skipBuffer.Length == 0)
            {
                return(skipPointer);
            }

            for (int level = numberOfSkipLevels - 1; level > 0; level--)
            {
                long length = skipBuffer[level].GetFilePointer();
                if (length > 0)
                {
                    output.WriteVLong(length);
                    skipBuffer[level].WriteTo(output);
                }
            }
            skipBuffer[0].WriteTo(output);

            return(skipPointer);
        }
        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            foreach (Field field  in doc.Fields())
            {
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            foreach (Field field in doc.Fields())
            {
                if (field.IsStored())
                {
                    fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));

                    byte bits = 0;
                    if (field.IsTokenized())
                    {
                        bits |= FieldsWriter.FIELD_IS_TOKENIZED;
                    }
                    if (field.IsBinary())
                    {
                        bits |= FieldsWriter.FIELD_IS_BINARY;
                    }
                    if (field.IsCompressed())
                    {
                        bits |= FieldsWriter.FIELD_IS_COMPRESSED;
                    }

                    fieldsStream.WriteByte(bits);

                    if (field.IsCompressed())
                    {
                        // compression is enabled for the current field
                        byte[] data = null;
                        // check if it is a binary field
                        if (field.IsBinary())
                        {
                            data = Compress(field.BinaryValue());
                        }
                        else
                        {
                            data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
                        }
                        int len = data.Length;
                        fieldsStream.WriteVInt(len);
                        fieldsStream.WriteBytes(data, len);
                    }
                    else
                    {
                        // compression is disabled for the current field
                        if (field.IsBinary())
                        {
                            byte[] data = field.BinaryValue();
                            int    len  = data.Length;
                            fieldsStream.WriteVInt(len);
                            fieldsStream.WriteBytes(data, len);
                        }
                        else
                        {
                            fieldsStream.WriteString(field.StringValue());
                        }
                    }
                }
            }
        }
Exemple #9
0
        /// <summary>Produce _X.nrm if any document had a field with norms
        /// not disabled
        /// </summary>
        public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            System.Collections.IDictionary byField = new System.Collections.Hashtable();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
            while (it.MoveNext())
            {
                System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current;

                System.Collections.ICollection fields         = (System.Collections.ICollection)entry.Value;
                System.Collections.IEnumerator fieldsIt       = fields.GetEnumerator();
                System.Collections.ArrayList   fieldsToRemove = new System.Collections.ArrayList();

                while (fieldsIt.MoveNext())
                {
                    NormsWriterPerField perField = (NormsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key;

                    if (perField.upto > 0)
                    {
                        // It has some norms
                        System.Collections.IList l = (System.Collections.IList)byField[perField.fieldInfo];
                        if (l == null)
                        {
                            l = new System.Collections.ArrayList();
                            byField[perField.fieldInfo] = l;
                        }
                        l.Add(perField);
                    }
                    // Remove this field since we haven't seen it
                    // since the previous flush
                    else
                    {
                        fieldsToRemove.Add(perField);
                    }
                }

                System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields;
                for (int i = 0; i < fieldsToRemove.Count; i++)
                {
                    fieldsHT.Remove(fieldsToRemove[i]);
                }
            }

            System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;
            state.flushedFiles[normsFileName] = normsFileName;
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

            try
            {
                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                {
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo];
                    int upto = 0;
                    if (toMerge != null)
                    {
                        int numFields = toMerge.Count;

                        normCount++;

                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                        {
                            fields[j] = (NormsWriterPerField)toMerge[j];
                        }

                        int numLeft = numFields;

                        while (numLeft > 0)
                        {
                            System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                            {
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                {
                                    minDocID = docID;
                                    minLoc   = j;
                                }
                            }

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocs);

                            // Fill hole
                            for (; upto < minDocID; upto++)
                            {
                                normsOut.WriteByte(defaultNorm);
                            }

                            normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
                            (uptos[minLoc])++;
                            upto++;

                            if (uptos[minLoc] == fields[minLoc].upto)
                            {
                                fields[minLoc].Reset();
                                if (minLoc != numLeft - 1)
                                {
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];
                                }
                                numLeft--;
                            }
                        }

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                    {
                        normCount++;
                        // Fill entire field with default norm:
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer());
                }
            }
            finally
            {
                normsOut.Close();
            }
        }
		/// <summary> Writes the buffered skip lists to the given output.
		/// 
		/// </summary>
		/// <param name="output">the IndexOutput the skip lists shall be written to 
		/// </param>
		/// <returns> the pointer the skip list starts
		/// </returns>
		internal virtual long WriteSkip(IndexOutput output)
		{
			long skipPointer = output.GetFilePointer();
			if (skipBuffer == null || skipBuffer.Length == 0)
				return skipPointer;
			
			for (int level = numberOfSkipLevels - 1; level > 0; level--)
			{
				long length = skipBuffer[level].GetFilePointer();
				if (length > 0)
				{
					output.WriteVLong(length);
					skipBuffer[level].WriteTo(output);
				}
			}
			skipBuffer[0].WriteTo(output);
			
			return skipPointer;
		}
Exemple #11
0
        private void  WritePostings(Posting[] postings, System.String segment)
        {
            IndexOutput       freq = null, prox = null;
            TermInfosWriter   tis              = null;
            TermVectorsWriter termVectorWriter = null;

            try
            {
                //open files for inverse index storage
                freq = directory.CreateOutput(segment + ".frq");
                prox = directory.CreateOutput(segment + ".prx");
                tis  = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
                TermInfo      ti           = new TermInfo();
                System.String currentField = null;

                for (int i = 0; i < postings.Length; i++)
                {
                    Posting posting = postings[i];

                    // add an entry to the dictionary with pointers to prox and freq files
                    ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), -1);
                    tis.Add(posting.term, ti);

                    // add an entry to the freq file
                    int postingFreq = posting.freq;
                    if (postingFreq == 1)
                    {
                        // optimize freq=1
                        freq.WriteVInt(1);
                    }
                    // set low bit of doc num.
                    else
                    {
                        freq.WriteVInt(0);                         // the document number
                        freq.WriteVInt(postingFreq);               // frequency in doc
                    }

                    int   lastPosition = 0;                   // write positions
                    int[] positions    = posting.positions;
                    for (int j = 0; j < postingFreq; j++)
                    {
                        // use delta-encoding
                        int position = positions[j];
                        prox.WriteVInt(position - lastPosition);
                        lastPosition = position;
                    }
                    // check to see if we switched to a new field
                    System.String termField = posting.term.Field();
                    if (currentField != termField)
                    {
                        // changing field - see if there is something to save
                        currentField = termField;
                        FieldInfo fi = fieldInfos.FieldInfo(currentField);
                        if (fi.storeTermVector)
                        {
                            if (termVectorWriter == null)
                            {
                                termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos);
                                termVectorWriter.OpenDocument();
                            }
                            termVectorWriter.OpenField(currentField);
                        }
                        else if (termVectorWriter != null)
                        {
                            termVectorWriter.CloseField();
                        }
                    }
                    if (termVectorWriter != null && termVectorWriter.IsFieldOpen())
                    {
                        termVectorWriter.AddTerm(posting.term.Text(), postingFreq, posting.positions, posting.offsets);
                    }
                }
                if (termVectorWriter != null)
                {
                    termVectorWriter.CloseDocument();
                }
            }
            finally
            {
                // make an effort to close all streams we can but remember and re-throw
                // the first exception encountered in this process
                System.IO.IOException keep = null;
                if (freq != null)
                {
                    try
                    {
                        freq.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (prox != null)
                {
                    try
                    {
                        prox.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (tis != null)
                {
                    try
                    {
                        tis.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (termVectorWriter != null)
                {
                    try
                    {
                        termVectorWriter.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (keep != null)
                {
                    throw new System.IO.IOException(keep.StackTrace);
                }
            }
        }
Exemple #12
0
        /// <summary>Merge files with the extensions added up to now.
        /// All files with these extensions are combined sequentially into the
        /// compound stream. After successful merge, the source files
        /// are deleted.
        /// </summary>
        /// <throws>  IllegalStateException if close() had been called before or </throws>
        /// <summary>   if no file has been added to this object
        /// </summary>
        public void  Close()
        {
            if (merged)
            {
                throw new System.SystemException("Merge already performed");
            }

            if ((entries.Count == 0))
            {
                throw new System.SystemException("No entries to merge have been defined");
            }

            merged = true;

            // open the compound stream
            IndexOutput os = null;

            try
            {
                os = directory.CreateOutput(fileName);

                // Write the number of entries
                os.WriteVInt(entries.Count);

                // Write the directory with all offsets at 0.
                // Remember the positions of directory entries so that we can
                // adjust the offsets later
                System.Collections.IEnumerator it = entries.GetEnumerator();
                long totalSize = 0;
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.directoryOffset = os.GetFilePointer();
                    os.WriteLong(0);                     // for now
                    os.WriteString(fe.file);
                    totalSize += directory.FileLength(fe.file);
                }

                // Pre-allocate size of file as optimization --
                // this can potentially help IO performance as
                // we write the file and also later during
                // searching.  It also uncovers a disk-full
                // situation earlier and hopefully without
                // actually filling disk to 100%:
                long finalLength = totalSize + os.GetFilePointer();
                os.SetLength(finalLength);

                // Open the files and copy their data into the stream.
                // Remember the locations of each file's data section.
                byte[] buffer = new byte[16384];
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.dataOffset = os.GetFilePointer();
                    CopyFile(fe, os, buffer);
                }

                // Write the data offsets into the directory of the compound stream
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    os.Seek(fe.directoryOffset);
                    os.WriteLong(fe.dataOffset);
                }

                System.Diagnostics.Debug.Assert(finalLength == os.Length());

                // Close the output stream. Set the os to null before trying to
                // close so that if an exception occurs during the close, the
                // finally clause below will not attempt to close the stream
                // the second time.
                IndexOutput tmp = os;
                os = null;
                tmp.Close();
            }
            finally
            {
                if (os != null)
                {
                    try
                    {
                        os.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                    }
                }
            }
        }
        /// <summary>Merge files with the extensions added up to now.
        /// All files with these extensions are combined sequentially into the
        /// compound stream. After successful merge, the source files
        /// are deleted.
        /// </summary>
        /// <throws>  IllegalStateException if close() had been called before or </throws>
        /// <summary>   if no file has been added to this object
        /// </summary>
        public void  Close()
        {
            if (merged)
            {
                throw new System.SystemException("Merge already performed");
            }

            if ((entries.Count == 0))
            {
                throw new System.SystemException("No entries to merge have been defined");
            }

            merged = true;

            // open the compound stream
            IndexOutput os = null;

            try
            {
                os = directory.CreateOutput(fileName);

                // Write the number of entries
                os.WriteVInt(entries.Count);

                // Write the directory with all offsets at 0.
                // Remember the positions of directory entries so that we can
                // adjust the offsets later
                System.Collections.IEnumerator it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.directoryOffset = os.GetFilePointer();
                    os.WriteLong(0);                     // for now
                    os.WriteString(fe.file);
                }

                // Open the files and copy their data into the stream.
                // Remember the locations of each file's data section.
                byte[] buffer = new byte[1024];
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    fe.dataOffset = os.GetFilePointer();
                    CopyFile(fe, os, buffer);
                }

                // Write the data offsets into the directory of the compound stream
                it = entries.GetEnumerator();
                while (it.MoveNext())
                {
                    FileEntry fe = (FileEntry)it.Current;
                    os.Seek(fe.directoryOffset);
                    os.WriteLong(fe.dataOffset);
                }

                // Close the output stream. Set the os to null before trying to
                // close so that if an exception occurs during the close, the
                // finally clause below will not attempt to close the stream
                // the second time.
                IndexOutput tmp = os;
                os = null;
                tmp.Close();
            }
            finally
            {
                if (os != null)
                {
                    try
                    {
                        os.Close();
                    }
                    catch (System.IO.IOException)
                    {
                    }
                }
            }
        }
Exemple #14
0
        public virtual void TestEncodeDecode()
        {
            int   iterations = RandomInts.RandomInt32Between(Random, 1, 1000);
            float AcceptableOverheadRatio = (float)Random.NextDouble();

            int[] values = new int[(iterations - 1) * Lucene41PostingsFormat.BLOCK_SIZE + ForUtil.MAX_DATA_SIZE];
            for (int i = 0; i < iterations; ++i)
            {
                int bpv = Random.Next(32);
                if (bpv == 0)
                {
                    int value = RandomInts.RandomInt32Between(Random, 0, int.MaxValue);
                    for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j)
                    {
                        values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = value;
                    }
                }
                else
                {
                    for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j)
                    {
                        values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = RandomInts.RandomInt32Between(Random, 0, (int)PackedInt32s.MaxValue(bpv));
                    }
                }
            }

            Directory d = new RAMDirectory();
            long      endPointer;

            {
                // encode
                IndexOutput @out    = d.CreateOutput("test.bin", IOContext.DEFAULT);
                ForUtil     forUtil = new ForUtil(AcceptableOverheadRatio, @out);

                for (int i = 0; i < iterations; ++i)
                {
                    forUtil.WriteBlock(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, values.Length), new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], @out);
                }
                endPointer = @out.GetFilePointer();
                @out.Dispose();
            }

            {
                // decode
                IndexInput @in     = d.OpenInput("test.bin", IOContext.READ_ONCE);
                ForUtil    forUtil = new ForUtil(@in);
                for (int i = 0; i < iterations; ++i)
                {
                    if (Random.NextBoolean())
                    {
                        forUtil.SkipBlock(@in);
                        continue;
                    }
                    int[] restored = new int[Lucene41.ForUtil.MAX_DATA_SIZE];
                    forUtil.ReadBlock(@in, new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], restored);
                    Assert.AreEqual(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, (i + 1) * Lucene41PostingsFormat.BLOCK_SIZE), Arrays.CopyOf(restored, Lucene41PostingsFormat.BLOCK_SIZE));
                }
                assertEquals(endPointer, @in.GetFilePointer());
                @in.Dispose();
            }
        }
		/* Walk through all unique text tokens (Posting
		* instances) found in this field and serialize them
		* into a single RAM segment. */
		internal void  AppendPostings(ThreadState.FieldData[] fields, TermInfosWriter termsOut, IndexOutput freqOut, IndexOutput proxOut)
		{
			
			int fieldNumber = fields[0].fieldInfo.number;
			int numFields = fields.Length;
			
			FieldMergeState[] mergeStates = new FieldMergeState[numFields];
			
			for (int i = 0; i < numFields; i++)
			{
				FieldMergeState fms = mergeStates[i] = new FieldMergeState();
				fms.field = fields[i];
				fms.postings = fms.field.SortPostings();
				
				System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields [0].fieldInfo);
				
				// Should always be true
				bool result = fms.NextTerm();
				System.Diagnostics.Debug.Assert(result);
			}
			
			int skipInterval = termsOut.skipInterval;
			currentFieldStorePayloads = fields[0].fieldInfo.storePayloads;
			
			FieldMergeState[] termStates = new FieldMergeState[numFields];
			
			while (numFields > 0)
			{
				
				// Get the next term to merge
				termStates[0] = mergeStates[0];
				int numToMerge = 1;
				
				for (int i = 1; i < numFields; i++)
				{
					char[] text = mergeStates[i].text;
					int textOffset = mergeStates[i].textOffset;
					int cmp = CompareText(text, textOffset, termStates[0].text, termStates[0].textOffset);
					
					if (cmp < 0)
					{
						termStates[0] = mergeStates[i];
						numToMerge = 1;
					}
					else if (cmp == 0)
						termStates[numToMerge++] = mergeStates[i];
				}
				
				int df = 0;
				int lastPayloadLength = - 1;
				
				int lastDoc = 0;
				
				char[] text2 = termStates[0].text;
				int start = termStates[0].textOffset;
				int pos = start;
				while (text2[pos] != 0xffff)
					pos++;
				
				long freqPointer = freqOut.GetFilePointer();
				long proxPointer = proxOut.GetFilePointer();
				
				skipListWriter.ResetSkip();
				
				// Now termStates has numToMerge FieldMergeStates
				// which all share the same term.  Now we must
				// interleave the docID streams.
				while (numToMerge > 0)
				{
					
					if ((++df % skipInterval) == 0)
					{
						skipListWriter.SetSkipData(lastDoc, currentFieldStorePayloads, lastPayloadLength);
						skipListWriter.BufferSkip(df);
					}
					
					FieldMergeState minState = termStates[0];
					for (int i = 1; i < numToMerge; i++)
						if (termStates[i].docID < minState.docID)
							minState = termStates[i];
					
					int doc = minState.docID;
					int termDocFreq = minState.termFreq;
					
					System.Diagnostics.Debug.Assert(doc < numDocsInRAM);
					System.Diagnostics.Debug.Assert(doc > lastDoc || df == 1);
					
					int newDocCode = (doc - lastDoc) << 1;
					lastDoc = doc;
					
					ByteSliceReader prox = minState.prox;
					
					// Carefully copy over the prox + payload info,
					// changing the format to match Lucene's segment
					// format.
					for (int j = 0; j < termDocFreq; j++)
					{
						int code = prox.ReadVInt();
						if (currentFieldStorePayloads)
						{
							int payloadLength;
							if ((code & 1) != 0)
							{
								// This position has a payload
								payloadLength = prox.ReadVInt();
							}
							else
								payloadLength = 0;
							if (payloadLength != lastPayloadLength)
							{
								proxOut.WriteVInt(code | 1);
								proxOut.WriteVInt(payloadLength);
								lastPayloadLength = payloadLength;
							}
							else
								proxOut.WriteVInt(code & (~ 1));
							if (payloadLength > 0)
								CopyBytes(prox, proxOut, payloadLength);
						}
						else
						{
							System.Diagnostics.Debug.Assert(0 ==(code & 1));
							proxOut.WriteVInt(code >> 1);
						}
					}
					
					if (1 == termDocFreq)
					{
						freqOut.WriteVInt(newDocCode | 1);
					}
					else
					{
						freqOut.WriteVInt(newDocCode);
						freqOut.WriteVInt(termDocFreq);
					}
					
					if (!minState.NextDoc())
					{
						
						// Remove from termStates
						int upto = 0;
						for (int i = 0; i < numToMerge; i++)
							if (termStates[i] != minState)
								termStates[upto++] = termStates[i];
						numToMerge--;
						System.Diagnostics.Debug.Assert(upto == numToMerge);
						
						// Advance this state to the next term
						
						if (!minState.NextTerm())
						{
							// OK, no more terms, so remove from mergeStates
							// as well
							upto = 0;
							for (int i = 0; i < numFields; i++)
								if (mergeStates[i] != minState)
									mergeStates[upto++] = mergeStates[i];
							numFields--;
							System.Diagnostics.Debug.Assert(upto == numFields);
						}
					}
				}
				
				System.Diagnostics.Debug.Assert(df > 0);
				
				// Done merging this term
				
				long skipPointer = skipListWriter.WriteSkip(freqOut);
				
				// Write term
				termInfo.Set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
				termsOut.Add(fieldNumber, text2, start, pos - start, termInfo);
			}
		}
Exemple #16
0
        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            System.Collections.IEnumerator fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
                // and field.binaryValue() already returns the compressed value for a field
                // with isCompressed()==true, so we disable compression in that case
                bool disableCompression = (field is FieldsReader.FieldForMerge);
                if (field.IsStored())
                {
                    fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));

                    byte bits = 0;
                    if (field.IsTokenized())
                    {
                        bits |= FieldsWriter.FIELD_IS_TOKENIZED;
                    }
                    if (field.IsBinary())
                    {
                        bits |= FieldsWriter.FIELD_IS_BINARY;
                    }
                    if (field.IsCompressed())
                    {
                        bits |= FieldsWriter.FIELD_IS_COMPRESSED;
                    }

                    fieldsStream.WriteByte(bits);

                    if (field.IsCompressed())
                    {
                        // compression is enabled for the current field
                        byte[] data = null;

                        if (disableCompression)
                        {
                            // optimized case for merging, the data
                            // is already compressed
                            data = field.BinaryValue();
                        }
                        else
                        {
                            // check if it is a binary field
                            if (field.IsBinary())
                            {
                                data = Compress(field.BinaryValue());
                            }
                            else
                            {
                                data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
                            }
                        }
                        int len = data.Length;
                        fieldsStream.WriteVInt(len);
                        fieldsStream.WriteBytes(data, len);
                    }
                    else
                    {
                        // compression is disabled for the current field
                        if (field.IsBinary())
                        {
                            byte[] data = field.BinaryValue();
                            int    len  = data.Length;
                            fieldsStream.WriteVInt(len);
                            fieldsStream.WriteBytes(data, len);
                        }
                        else
                        {
                            fieldsStream.WriteString(field.StringValue());
                        }
                    }
                }
            }
        }
        /* Walk through all unique text tokens (Posting
         * instances) found in this field and serialize them
         * into a single RAM segment. */
        void AppendPostings(DocumentsWriter.FlushState flushState,
                            FreqProxTermsWriterPerField[] fields,
                            TermInfosWriter termsOut,
                            IndexOutput freqOut,
                            IndexOutput proxOut,
                            DefaultSkipListWriter skipListWriter)
        {
            int fieldNumber = fields[0].fieldInfo.number;
            int numFields = fields.Length;

            FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields];

            for (int i = 0; i < numFields; i++)
            {
                FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]);

                System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields[0].fieldInfo);

                // Should always be true
                bool result = fms.nextTerm();
                System.Diagnostics.Debug.Assert(result);
            }

            int skipInterval = termsOut.skipInterval;
            bool currentFieldOmitTf = fields[0].fieldInfo.omitTf;

            // If current field omits tf then it cannot store
            // payloads.  We silently drop the payloads in this case:
            bool currentFieldStorePayloads = currentFieldOmitTf ? false : fields[0].fieldInfo.storePayloads;

            FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];

            while (numFields > 0)
            {

                // Get the next term to merge
                termStates[0] = mergeStates[0];
                int numToMerge = 1;

                for (int i = 1; i < numFields; i++)
                {
                    char[] text = mergeStates[i].text;
                    int textOffset = mergeStates[i].textOffset;
                    int cmp = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset);

                    if (cmp < 0)
                    {
                        termStates[0] = mergeStates[i];
                        numToMerge = 1;
                    }
                    else if (cmp == 0)
                        termStates[numToMerge++] = mergeStates[i];
                }

                int df = 0;
                int lastPayloadLength = -1;

                int lastDoc = 0;

                char[] text_Renamed = termStates[0].text;
                int start = termStates[0].textOffset;

                long freqPointer = freqOut.GetFilePointer();
                long proxPointer;
                if (proxOut != null)
                    proxPointer = proxOut.GetFilePointer();
                else
                    proxPointer = 0;

                skipListWriter.ResetSkip();

                // Now termStates has numToMerge FieldMergeStates
                // which all share the same term.  Now we must
                // interleave the docID streams.
                while (numToMerge > 0)
                {

                    if ((++df % skipInterval) == 0)
                    {
                        skipListWriter.SetSkipData(lastDoc, currentFieldStorePayloads, lastPayloadLength);
                        skipListWriter.BufferSkip(df);
                    }

                    FreqProxFieldMergeState minState = termStates[0];
                    for (int i = 1; i < numToMerge; i++)
                        if (termStates[i].docID < minState.docID)
                            minState = termStates[i];

                    int doc = minState.docID;
                    int termDocFreq = minState.termFreq;

                    System.Diagnostics.Debug.Assert(doc < flushState.numDocsInRAM);
                    System.Diagnostics.Debug.Assert(doc > lastDoc || df == 1);

                    ByteSliceReader prox = minState.prox;

                    // Carefully copy over the prox + payload info,
                    // changing the format to match Lucene's segment
                    // format.
                    if (!currentFieldOmitTf)
                    {
                        // omitTf == false so we do write positions & payload
                        System.Diagnostics.Debug.Assert(proxOut != null);
                        for (int j = 0; j < termDocFreq; j++)
                        {
                            int code = prox.ReadVInt();
                            if (currentFieldStorePayloads)
                            {
                                int payloadLength;
                                if ((code & 1) != 0)
                                {
                                    // This position has a payload
                                    payloadLength = prox.ReadVInt();
                                }
                                else
                                    payloadLength = 0;
                                if (payloadLength != lastPayloadLength)
                                {
                                    proxOut.WriteVInt(code | 1);
                                    proxOut.WriteVInt(payloadLength);
                                    lastPayloadLength = payloadLength;
                                }
                                else
                                    proxOut.WriteVInt(code & (~1));
                                if (payloadLength > 0)
                                    copyBytes(prox, proxOut, payloadLength);
                            }
                            else
                            {
                                System.Diagnostics.Debug.Assert(0 == (code & 1));
                                proxOut.WriteVInt(code >> 1);
                            }
                        } //End for

                        int newDocCode = (doc - lastDoc) << 1;

                        if (1 == termDocFreq)
                        {
                            freqOut.WriteVInt(newDocCode | 1);
                        }
                        else
                        {
                            freqOut.WriteVInt(newDocCode);
                            freqOut.WriteVInt(termDocFreq);
                        }
                    }
                    else
                    {
                        // omitTf==true: we store only the docs, without
                        // term freq, positions, payloads
                        freqOut.WriteVInt(doc - lastDoc);
                    }

                    lastDoc = doc;

                    if (!minState.nextDoc())
                    {

                        // Remove from termStates
                        int upto = 0;
                        for (int i = 0; i < numToMerge; i++)
                            if (termStates[i] != minState)
                                termStates[upto++] = termStates[i];
                        numToMerge--;
                        System.Diagnostics.Debug.Assert(upto == numToMerge);

                        // Advance this state to the next term

                        if (!minState.nextTerm())
                        {
                            // OK, no more terms, so remove from mergeStates
                            // as well
                            upto = 0;
                            for (int i = 0; i < numFields; i++)
                                if (mergeStates[i] != minState)
                                    mergeStates[upto++] = mergeStates[i];
                            numFields--;
                            System.Diagnostics.Debug.Assert(upto == numFields);
                        }
                    }
                }

                System.Diagnostics.Debug.Assert(df > 0);

                // Done merging this term

                long skipPointer = skipListWriter.WriteSkip(freqOut);

                // Write term
                termInfo.Set(df, freqPointer, proxPointer, (int)(skipPointer - freqPointer));

                // TODO: we could do this incrementally
                UnicodeUtil.UTF16toUTF8(text_Renamed, start, termsUTF8);

                // TODO: we could save O(n) re-scan of the term by
                // computing the shared prefix with the last term
                // while during the UTF8 encoding
                termsOut.Add(fieldNumber,
                             termsUTF8.result,
                             termsUTF8.length,
                             termInfo);
            }
        }
Exemple #18
0
 // Writes the contents of buffer into the fields stream
 // and adds a new entry for this document into the index
 // stream.  This assumes the buffer was already written
 // in the correct fields format.
 internal void  FlushDocument(int numStoredFields, RAMOutputStream buffer)
 {
     indexStream.WriteLong(fieldsStream.GetFilePointer());
     fieldsStream.WriteVInt(numStoredFields);
     buffer.WriteTo(fieldsStream);
 }
        /** Produce _X.nrm if any document had a field with norms
         *  not disabled */
        internal override void flush(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state)
        {
            IDictionary <object, object> byField = new Dictionary <object, object>();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            IEnumerator <KeyValuePair <object, ICollection <object> > > it = threadsAndFields.GetEnumerator();

            while (it.MoveNext())
            {
                KeyValuePair <object, ICollection <object> > entry = it.Current;

                ICollection <object> fields         = entry.Value;
                IEnumerator <object> fieldsIt       = fields.GetEnumerator();
                List <object>        fieldsToRemove = new List <object>(fields.Count);

                while (fieldsIt.MoveNext())
                {
                    NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current;

                    if (perField.upto > 0)
                    {
                        // It has some norms
                        IList <object> l;
                        if (byField.ContainsKey(perField.fieldInfo))
                        {
                            l = (IList <object>)byField[perField.fieldInfo];
                        }
                        else
                        {
                            l = new List <object>();
                            byField[perField.fieldInfo] = l;
                        }
                        //IList<object> l = (IList<object>)byField[perField.fieldInfo];
                        //if (l == null)
                        //{
                        //    l = new List<object>();
                        //    byField[perField.fieldInfo] = l;
                        //}
                        l.Add(perField);
                    }
                    else
                    {
                        // Remove this field since we haven't seen it
                        // since the previous flush
                        fieldsToRemove.Add(perField);
                        //fields.Remove(perField);
                    }
                }
                for (int i = 0; i < fieldsToRemove.Count; i++)
                {
                    fields.Remove(fieldsToRemove[i]);
                }
            }

            string normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;

            state.flushedFiles[normsFileName] = normsFileName;
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

            try
            {
                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                {
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    List <object> toMerge;
                    int           upto = 0;
                    if (byField.ContainsKey(fieldInfo))
                    {
                        toMerge = (List <object>)byField[fieldInfo];

                        int numFields = toMerge.Count;

                        normCount++;

                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                        {
                            fields[j] = (NormsWriterPerField)toMerge[j];
                        }

                        int numLeft = numFields;

                        while (numLeft > 0)
                        {
                            System.Diagnostics.Debug.Assert(uptos[0] < fields[0].docIDs.Length, " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                            {
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                {
                                    minDocID = docID;
                                    minLoc   = j;
                                }
                            }

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocsInRAM);

                            // Fill hole
                            for (; upto < minDocID; upto++)
                            {
                                normsOut.WriteByte(defaultNorm);
                            }

                            normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
                            (uptos[minLoc])++;
                            upto++;

                            if (uptos[minLoc] == fields[minLoc].upto)
                            {
                                fields[minLoc].reset();
                                if (minLoc != numLeft - 1)
                                {
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];
                                }
                                numLeft--;
                            }
                        }

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocsInRAM; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                    {
                        normCount++;
                        // Fill entire field with default norm:
                        for (; upto < state.numDocsInRAM; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocsInRAM == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocsInRAM) + " actual=" + normsOut.GetFilePointer());
                }
            }
            finally
            {
                normsOut.Close();
            }
        }
Exemple #20
0
        /// <summary>Copy the contents of the file with specified extension into the
        /// provided output stream. Use the provided buffer for moving data
        /// to reduce memory allocation.
        /// </summary>
        private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
        {
            IndexInput is_Renamed = null;
            try
            {
                long startPtr = os.GetFilePointer();

                is_Renamed = directory.OpenInput(source.file);
                long length = is_Renamed.Length();
                long remainder = length;
                int chunk = buffer.Length;

                while (remainder > 0)
                {
                    int len = (int) System.Math.Min(chunk, remainder);
                    is_Renamed.ReadBytes(buffer, 0, len, false);
                    os.WriteBytes(buffer, len);
                    remainder -= len;
                    if (checkAbort != null)
                    // Roughly every 2 MB we will check if
                    // it's time to abort
                        checkAbort.Work(80);
                }

                // Verify that remainder is 0
                if (remainder != 0)
                    throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");

                // Verify that the output length diff is equal to original file
                long endPtr = os.GetFilePointer();
                long diff = endPtr - startPtr;
                if (diff != length)
                    throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
            }
            finally
            {
                if (is_Renamed != null)
                    is_Renamed.Close();
            }
        }
        /* Walk through all unique text tokens (Posting
         * instances) found in this field and serialize them
         * into a single RAM segment. */
        void AppendPostings(DocumentsWriter.FlushState flushState,
                            FreqProxTermsWriterPerField[] fields,
                            TermInfosWriter termsOut,
                            IndexOutput freqOut,
                            IndexOutput proxOut,
                            DefaultSkipListWriter skipListWriter)
        {
            int fieldNumber = fields[0].fieldInfo.number;
            int numFields   = fields.Length;

            FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields];

            for (int i = 0; i < numFields; i++)
            {
                FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]);

                System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields[0].fieldInfo);

                // Should always be true
                bool result = fms.nextTerm();
                System.Diagnostics.Debug.Assert(result);
            }

            int  skipInterval       = termsOut.skipInterval;
            bool currentFieldOmitTf = fields[0].fieldInfo.omitTf;

            // If current field omits tf then it cannot store
            // payloads.  We silently drop the payloads in this case:
            bool currentFieldStorePayloads = currentFieldOmitTf ? false : fields[0].fieldInfo.storePayloads;

            FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];

            while (numFields > 0)
            {
                // Get the next term to merge
                termStates[0] = mergeStates[0];
                int numToMerge = 1;

                for (int i = 1; i < numFields; i++)
                {
                    char[] text       = mergeStates[i].text;
                    int    textOffset = mergeStates[i].textOffset;
                    int    cmp        = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset);

                    if (cmp < 0)
                    {
                        termStates[0] = mergeStates[i];
                        numToMerge    = 1;
                    }
                    else if (cmp == 0)
                    {
                        termStates[numToMerge++] = mergeStates[i];
                    }
                }

                int df = 0;
                int lastPayloadLength = -1;

                int lastDoc = 0;

                char[] text_Renamed = termStates[0].text;
                int    start        = termStates[0].textOffset;

                long freqPointer = freqOut.GetFilePointer();
                long proxPointer;
                if (proxOut != null)
                {
                    proxPointer = proxOut.GetFilePointer();
                }
                else
                {
                    proxPointer = 0;
                }

                skipListWriter.ResetSkip();

                // Now termStates has numToMerge FieldMergeStates
                // which all share the same term.  Now we must
                // interleave the docID streams.
                while (numToMerge > 0)
                {
                    if ((++df % skipInterval) == 0)
                    {
                        skipListWriter.SetSkipData(lastDoc, currentFieldStorePayloads, lastPayloadLength);
                        skipListWriter.BufferSkip(df);
                    }

                    FreqProxFieldMergeState minState = termStates[0];
                    for (int i = 1; i < numToMerge; i++)
                    {
                        if (termStates[i].docID < minState.docID)
                        {
                            minState = termStates[i];
                        }
                    }

                    int doc         = minState.docID;
                    int termDocFreq = minState.termFreq;

                    System.Diagnostics.Debug.Assert(doc < flushState.numDocsInRAM);
                    System.Diagnostics.Debug.Assert(doc > lastDoc || df == 1);

                    ByteSliceReader prox = minState.prox;

                    // Carefully copy over the prox + payload info,
                    // changing the format to match Lucene's segment
                    // format.
                    if (!currentFieldOmitTf)
                    {
                        // omitTf == false so we do write positions & payload
                        System.Diagnostics.Debug.Assert(proxOut != null);
                        for (int j = 0; j < termDocFreq; j++)
                        {
                            int code = prox.ReadVInt();
                            if (currentFieldStorePayloads)
                            {
                                int payloadLength;
                                if ((code & 1) != 0)
                                {
                                    // This position has a payload
                                    payloadLength = prox.ReadVInt();
                                }
                                else
                                {
                                    payloadLength = 0;
                                }
                                if (payloadLength != lastPayloadLength)
                                {
                                    proxOut.WriteVInt(code | 1);
                                    proxOut.WriteVInt(payloadLength);
                                    lastPayloadLength = payloadLength;
                                }
                                else
                                {
                                    proxOut.WriteVInt(code & (~1));
                                }
                                if (payloadLength > 0)
                                {
                                    copyBytes(prox, proxOut, payloadLength);
                                }
                            }
                            else
                            {
                                System.Diagnostics.Debug.Assert(0 == (code & 1));
                                proxOut.WriteVInt(code >> 1);
                            }
                        } //End for

                        int newDocCode = (doc - lastDoc) << 1;

                        if (1 == termDocFreq)
                        {
                            freqOut.WriteVInt(newDocCode | 1);
                        }
                        else
                        {
                            freqOut.WriteVInt(newDocCode);
                            freqOut.WriteVInt(termDocFreq);
                        }
                    }
                    else
                    {
                        // omitTf==true: we store only the docs, without
                        // term freq, positions, payloads
                        freqOut.WriteVInt(doc - lastDoc);
                    }

                    lastDoc = doc;

                    if (!minState.nextDoc())
                    {
                        // Remove from termStates
                        int upto = 0;
                        for (int i = 0; i < numToMerge; i++)
                        {
                            if (termStates[i] != minState)
                            {
                                termStates[upto++] = termStates[i];
                            }
                        }
                        numToMerge--;
                        System.Diagnostics.Debug.Assert(upto == numToMerge);

                        // Advance this state to the next term

                        if (!minState.nextTerm())
                        {
                            // OK, no more terms, so remove from mergeStates
                            // as well
                            upto = 0;
                            for (int i = 0; i < numFields; i++)
                            {
                                if (mergeStates[i] != minState)
                                {
                                    mergeStates[upto++] = mergeStates[i];
                                }
                            }
                            numFields--;
                            System.Diagnostics.Debug.Assert(upto == numFields);
                        }
                    }
                }

                System.Diagnostics.Debug.Assert(df > 0);

                // Done merging this term

                long skipPointer = skipListWriter.WriteSkip(freqOut);

                // Write term
                termInfo.Set(df, freqPointer, proxPointer, (int)(skipPointer - freqPointer));

                // TODO: we could do this incrementally
                UnicodeUtil.UTF16toUTF8(text_Renamed, start, termsUTF8);

                // TODO: we could save O(n) re-scan of the term by
                // computing the shared prefix with the last term
                // while during the UTF8 encoding
                termsOut.Add(fieldNumber,
                             termsUTF8.result,
                             termsUTF8.length,
                             termInfo);
            }
        }