internal override void closeDocStore(DocumentsWriter.FlushState state)
        {
            lock (this)
            {
                int inc = state.numDocsInStore - lastDocID;
                if (inc > 0)
                {
                    initFieldsWriter();
                    fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
                }

                if (fieldsWriter != null)
                {
                    fieldsWriter.Close();
                    fieldsWriter = null;
                    lastDocID    = 0;
                    System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);

                    string fdtFile = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION;
                    string fdxFile = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;

                    state.flushedFiles[fdtFile] = fdtFile;
                    state.flushedFiles[fdxFile] = fdxFile;

                    state.docWriter.RemoveOpenFile(fdtFile);
                    state.docWriter.RemoveOpenFile(fdxFile);

                    if (4 + state.numDocsInStore * 8 != state.directory.FileLength(fdxFile))
                    {
                        throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fdxFile) + " length in bytes of " + fdxFile);
                    }
                }
            }
        }
Example #2
0
        internal override void flush(IDictionary <object, object> threadsAndFields, DocumentsWriter.FlushState state)
        {
            lock (this)
            {
                if (tvx != null)
                {
                    if (state.numDocsInStore > 0)
                    {
                        // In case there are some documents that we
                        // didn't see (because they hit a non-aborting exception):
                        fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
                    }

                    tvx.Flush();
                    tvd.Flush();
                    tvf.Flush();
                }

                IEnumerator <KeyValuePair <object, object> > it = threadsAndFields.GetEnumerator();
                while (it.MoveNext())
                {
                    KeyValuePair <object, object> entry = (KeyValuePair <object, object>)it.Current;
                    IEnumerator <object>          it2   = ((ICollection <object>)entry.Value).GetEnumerator();
                    while (it2.MoveNext())
                    {
                        TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField)it2.Current;
                        perField.termsHashPerField.reset();
                        perField.shrinkHash();
                    }

                    TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread)entry.Key;
                    perThread.termsHashPerThread.reset(true);
                }
            }
        }
Example #3
0
        internal override void closeDocStore(DocumentsWriter.FlushState state)
        {
            lock (this)
            {
                if (tvx != null)
                {
                    // At least one doc in this run had term vectors
                    // enabled
                    fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
                    tvx.Close();
                    tvf.Close();
                    tvd.Close();
                    tvx = null;
                    System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
                    if (4 + state.numDocsInStore * 16 != state.directory.FileLength(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
                    {
                        throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION) + " length in bytes of " + state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
                    }

                    string tvxFile = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
                    string tvfFile = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION;
                    string tvdFile = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION;

                    state.flushedFiles[tvxFile] = tvxFile;
                    state.flushedFiles[tvfFile] = tvfFile;
                    state.flushedFiles[tvdFile] = tvdFile;

                    docWriter.RemoveOpenFile(tvxFile);
                    docWriter.RemoveOpenFile(tvfFile);
                    docWriter.RemoveOpenFile(tvdFile);

                    lastDocID = 0;
                }
            }
        }
Example #4
0
 internal override void closeDocStore(DocumentsWriter.FlushState state)
 {
     try
     {
         one.closeDocStore(state);
     }
     finally
     {
         two.closeDocStore(state);
     }
 }
Example #5
0
 internal override void closeDocStore(DocumentsWriter.FlushState state)
 {
     lock (this)
     {
         consumer.closeDocStore(state);
         if (nextTermsHash != null)
         {
             nextTermsHash.closeDocStore(state);
         }
     }
 }
        /// <summary>
        /// If there are fields we've seen but did not see again in the last run, then free them up.
        /// </summary>
        /// <param name="state"></param>
        internal void trimFields(DocumentsWriter.FlushState state)
        {
            for (int i = 0; i < fieldHash.Length; i++)
            {
                DocFieldProcessorPerField perField     = fieldHash[i];
                DocFieldProcessorPerField lastPerField = null;

                while (perField != null)
                {
                    if (perField.lastGen == -1)
                    {
                        // This field was not seen since the previous
                        // flush, so, free up its resources now

                        // Unhash
                        if (lastPerField == null)
                        {
                            fieldHash[i] = perField.next;
                        }
                        else
                        {
                            lastPerField.next = perField.next;
                        }

                        if (state.docWriter.infoStream != null)
                        {
                            state.docWriter.infoStream.WriteLine("  purge field=" + perField.fieldInfo.name);
                        }

                        totalFieldCount--;
                    }
                    else
                    {
                        // Reset
                        perField.lastGen = -1;
                        lastPerField     = perField;
                    }

                    perField = perField.next;
                }
            }
        }
        internal override void Flush(ICollection <object> threads, DocumentsWriter.FlushState state)
        {
            IDictionary <object, ICollection <object> > childThreadsAndFields = new Dictionary <object, ICollection <object> >();
            IEnumerator <object> it = threads.GetEnumerator();

            while (it.MoveNext())
            {
                DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread)it.Current;
                childThreadsAndFields[perThread.consumer] = perThread.Fields();
                perThread.trimFields(state);
            }

            consumer.flush(childThreadsAndFields, state);

            // Important to save after asking consumer to flush so
            // consumer can alter the FieldInfo* if necessary.  EG,
            // FreqProxTermsWriter does this with
            // FieldInfo.storePayload.
            fieldInfos.Write(state.directory, state.segmentName + ".fnm");
        }
        /** Produce _X.nrm if any document had a field with norms
         *  not disabled */
        internal override void flush(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state)
        {
            IDictionary <object, object> byField = new Dictionary <object, object>();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            IEnumerator <KeyValuePair <object, ICollection <object> > > it = threadsAndFields.GetEnumerator();

            while (it.MoveNext())
            {
                KeyValuePair <object, ICollection <object> > entry = it.Current;

                ICollection <object> fields         = entry.Value;
                IEnumerator <object> fieldsIt       = fields.GetEnumerator();
                List <object>        fieldsToRemove = new List <object>(fields.Count);

                while (fieldsIt.MoveNext())
                {
                    NormsWriterPerField perField = (NormsWriterPerField)fieldsIt.Current;

                    if (perField.upto > 0)
                    {
                        // It has some norms
                        IList <object> l;
                        if (byField.ContainsKey(perField.fieldInfo))
                        {
                            l = (IList <object>)byField[perField.fieldInfo];
                        }
                        else
                        {
                            l = new List <object>();
                            byField[perField.fieldInfo] = l;
                        }
                        //IList<object> l = (IList<object>)byField[perField.fieldInfo];
                        //if (l == null)
                        //{
                        //    l = new List<object>();
                        //    byField[perField.fieldInfo] = l;
                        //}
                        l.Add(perField);
                    }
                    else
                    {
                        // Remove this field since we haven't seen it
                        // since the previous flush
                        fieldsToRemove.Add(perField);
                        //fields.Remove(perField);
                    }
                }
                for (int i = 0; i < fieldsToRemove.Count; i++)
                {
                    fields.Remove(fieldsToRemove[i]);
                }
            }

            string normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;

            state.flushedFiles[normsFileName] = normsFileName;
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

            try
            {
                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                {
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    List <object> toMerge;
                    int           upto = 0;
                    if (byField.ContainsKey(fieldInfo))
                    {
                        toMerge = (List <object>)byField[fieldInfo];

                        int numFields = toMerge.Count;

                        normCount++;

                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                        {
                            fields[j] = (NormsWriterPerField)toMerge[j];
                        }

                        int numLeft = numFields;

                        while (numLeft > 0)
                        {
                            System.Diagnostics.Debug.Assert(uptos[0] < fields[0].docIDs.Length, " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                            {
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                {
                                    minDocID = docID;
                                    minLoc   = j;
                                }
                            }

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocsInRAM);

                            // Fill hole
                            for (; upto < minDocID; upto++)
                            {
                                normsOut.WriteByte(defaultNorm);
                            }

                            normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
                            (uptos[minLoc])++;
                            upto++;

                            if (uptos[minLoc] == fields[minLoc].upto)
                            {
                                fields[minLoc].reset();
                                if (minLoc != numLeft - 1)
                                {
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];
                                }
                                numLeft--;
                            }
                        }

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocsInRAM; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                    {
                        normCount++;
                        // Fill entire field with default norm:
                        for (; upto < state.numDocsInRAM; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocsInRAM == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocsInRAM) + " actual=" + normsOut.GetFilePointer());
                }
            }
            finally
            {
                normsOut.Close();
            }
        }
 internal override void closeDocStore(DocumentsWriter.FlushState state)
 {
 }
Example #10
0
 
 internal abstract class DocConsumer
 { 
 internal abstract DocConsumerPerThread addThread(DocumentsWriterThreadState perThread); 
 internal abstract void Flush(System.Collections.Generic.ICollection <object> threads, DocumentsWriter.FlushState state); 
 internal abstract void closeDocStore(DocumentsWriter.FlushState state); 
 internal abstract void abort(); 
 internal abstract bool freeRAM(); 
 } 

Example #11
0
using System.Collections.Generic;

namespace Lucene.Net.Index
{
    internal abstract class DocFieldConsumer
    {

        internal FieldInfos fieldInfos;

        /// <summary>
        /// Called when DocumentsWriter decides to create a new
        /// segment
        /// </summary>
        //internal abstract void flush(/* IDictionary */ IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, DocumentsWriter.FlushState state);
        internal abstract void flush(/* IDictionary */ IDictionary<object, ICollection<object>> threadsAndFields, DocumentsWriter.FlushState state);

        /// <summary>
        /// Called when DocumentsWriter decides to close the doc
        /// stores
        /// </summary>
        internal abstract void closeDocStore(DocumentsWriter.FlushState state);

        /// <summary>
        /// Called when an aborting exception is hit
        /// </summary>
        internal abstract void Abort();

        /// <summary>
        /// Add a new thread
        /// </summary>
        internal abstract DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread);

        /// <summary>
        /// Called when DocumentsWriter is using too much RAM.
        /// The consumer should free RAM, if possible, returning
        /// true if any RAM was in fact freed.
        /// </summary>
        internal abstract bool freeRAM();

        internal virtual void setFieldInfos(FieldInfos fieldInfos)
        {
            this.fieldInfos = fieldInfos;
        }
    }
}

 /// <summary>  Close doc stores </summary>
 internal abstract void closeDocStore(DocumentsWriter.FlushState state);
 /// <summary>  Flush a new segment </summary>
 internal abstract void flush(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state);
        internal override void flush(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state)
        {
            lock (this)
            {
                if (state.numDocsInStore > 0)
                {
                    // It's possible that all documents seen in this segment
                    // hit non-aborting exceptions, in which case we will
                    // not have yet init'd the FieldsWriter:
                    initFieldsWriter();

                    // Fill fdx file to include any final docs that we
                    // skipped because they hit non-aborting exceptions
                    fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
                }

                if (fieldsWriter != null)
                {
                    fieldsWriter.Flush();
                }
            }
        }
Example #15
0
 internal override void closeDocStore(DocumentsWriter.FlushState state)
 {
     consumer.closeDocStore(state);
     endConsumer.closeDocStore(state);
 }
Example #16
0
        //internal override void flush(/*IDictionary*/ IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, DocumentsWriter.FlushState state)
        internal override void flush(/*IDictionary*/ IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state)
        {
            //IDictionary oneThreadsAndFields = new Dictionary();
            //IDictionary twoThreadsAndFields = new Dictionary();
            //IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> oneThreadsAndFields = new Dictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>();
            //IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> twoThreadsAndFields = new Dictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>();
            IDictionary <object, ICollection <object> > oneThreadsAndFields = new Dictionary <object, ICollection <object> >();
            IDictionary <object, ICollection <object> > twoThreadsAndFields = new Dictionary <object, ICollection <object> >();

            //IEnumerator it = threadsAndFields.GetEnumerator();
            //IEnumerator<KeyValuePair<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>> it = threadsAndFields.GetEnumerator();
            IEnumerator <KeyValuePair <object, ICollection <object> > > it = threadsAndFields.GetEnumerator();

            while (it.MoveNext())
            {
                //KeyValuePair entry = it.Current;
                //KeyValuePair<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> entry = it.Current;
                KeyValuePair <object, ICollection <object> > entry = it.Current;

                //DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread)entry.Key;
                DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread)entry.Key;

                //ICollection fields = (ICollection)entry.Value;
                //ICollection<DocFieldConsumerPerField> fields = entry.Value;
                ICollection <object> fields = entry.Value;

                //IEnumerator fieldsIt = fields.GetEnumerator();
                //IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator();
                IEnumerator <object> fieldsIt = fields.GetEnumerator();

                //IDictionary oneFields = new Dictionary();
                //IDictionary twoFields = new Dictionary();
                //IDictionary<DocFieldConsumerPerField, DocFieldConsumerPerField> oneFields = new Dictionary<DocFieldConsumerPerField, DocFieldConsumerPerField>();
                //IDictionary<DocFieldConsumerPerField, DocFieldConsumerPerField> twoFields = new Dictionary<DocFieldConsumerPerField, DocFieldConsumerPerField>();
                IDictionary <object, object> oneFields = new Dictionary <object, object>();
                IDictionary <object, object> twoFields = new Dictionary <object, object>();

                while (fieldsIt.MoveNext())
                {
                    DocFieldConsumersPerField perField = (DocFieldConsumersPerField)fieldsIt.Current;
                    oneFields[perField.one] = perField.one;
                    twoFields[perField.two] = perField.two;
                }

                oneThreadsAndFields[perThread.one] = oneFields.Keys;
                twoThreadsAndFields[perThread.two] = twoFields.Keys;
            }

            one.flush(oneThreadsAndFields, state);
            two.flush(twoThreadsAndFields, state);
        }
Example #17
0
        internal void shrinkFreePostings(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state)
        {
            System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, System.Threading.Thread.CurrentThread.Name + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer);

            int newSize = ArrayUtil.GetShrinkSize(postingsFreeList.Length, postingsAllocCount);

            if (newSize != postingsFreeList.Length)
            {
                RawPostingList[] newArray = new RawPostingList[newSize];
                System.Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
                postingsFreeList = newArray;
            }
        }
Example #18
0
        internal override void flush(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state)
        {
            lock (this)
            {
                IDictionary <object, object> childThreadsAndFields = new Dictionary <object, object>();
                IDictionary <object, ICollection <object> > nextThreadsAndFields;

                if (nextTermsHash != null)
                {
                    nextThreadsAndFields = new Dictionary <object, ICollection <object> >();
                }
                else
                {
                    nextThreadsAndFields = null;
                }

                IEnumerator <KeyValuePair <object, ICollection <object> > > it = threadsAndFields.GetEnumerator();
                while (it.MoveNext())
                {
                    KeyValuePair <object, ICollection <object> > entry = it.Current;

                    TermsHashPerThread perThread = (TermsHashPerThread)entry.Key;

                    ICollection <object> fields = entry.Value;

                    IEnumerator <object>         fieldsIt    = fields.GetEnumerator();
                    IDictionary <object, object> childFields = new Dictionary <object, object>();
                    IDictionary <object, object> nextChildFields;

                    if (nextTermsHash != null)
                    {
                        nextChildFields = new Dictionary <object, object>();
                    }
                    else
                    {
                        nextChildFields = null;
                    }

                    while (fieldsIt.MoveNext())
                    {
                        TermsHashPerField perField = (TermsHashPerField)fieldsIt.Current;
                        childFields[perField.consumer] = perField.consumer;
                        if (nextTermsHash != null)
                        {
                            nextChildFields[perField.nextPerField] = perField.nextPerField;
                        }
                    }

                    childThreadsAndFields[perThread.consumer] = childFields.Keys;
                    if (nextTermsHash != null)
                    {
                        nextThreadsAndFields[perThread.nextPerThread] = nextChildFields.Keys;
                    }
                }

                consumer.flush(childThreadsAndFields, state);

                shrinkFreePostings(threadsAndFields, state);

                if (nextTermsHash != null)
                {
                    nextTermsHash.flush(nextThreadsAndFields, state);
                }
            }
        }
        /* Walk through all unique text tokens (Posting
         * instances) found in this field and serialize them
         * into a single RAM segment. */
        void AppendPostings(DocumentsWriter.FlushState flushState,
                            FreqProxTermsWriterPerField[] fields,
                            TermInfosWriter termsOut,
                            IndexOutput freqOut,
                            IndexOutput proxOut,
                            DefaultSkipListWriter skipListWriter)
        {
            int fieldNumber = fields[0].fieldInfo.number;
            int numFields   = fields.Length;

            FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields];

            for (int i = 0; i < numFields; i++)
            {
                FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]);

                System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields[0].fieldInfo);

                // Should always be true
                bool result = fms.nextTerm();
                System.Diagnostics.Debug.Assert(result);
            }

            int  skipInterval       = termsOut.skipInterval;
            bool currentFieldOmitTf = fields[0].fieldInfo.omitTf;

            // If current field omits tf then it cannot store
            // payloads.  We silently drop the payloads in this case:
            bool currentFieldStorePayloads = currentFieldOmitTf ? false : fields[0].fieldInfo.storePayloads;

            FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];

            while (numFields > 0)
            {
                // Get the next term to merge
                termStates[0] = mergeStates[0];
                int numToMerge = 1;

                for (int i = 1; i < numFields; i++)
                {
                    char[] text       = mergeStates[i].text;
                    int    textOffset = mergeStates[i].textOffset;
                    int    cmp        = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset);

                    if (cmp < 0)
                    {
                        termStates[0] = mergeStates[i];
                        numToMerge    = 1;
                    }
                    else if (cmp == 0)
                    {
                        termStates[numToMerge++] = mergeStates[i];
                    }
                }

                int df = 0;
                int lastPayloadLength = -1;

                int lastDoc = 0;

                char[] text_Renamed = termStates[0].text;
                int    start        = termStates[0].textOffset;

                long freqPointer = freqOut.GetFilePointer();
                long proxPointer;
                if (proxOut != null)
                {
                    proxPointer = proxOut.GetFilePointer();
                }
                else
                {
                    proxPointer = 0;
                }

                skipListWriter.ResetSkip();

                // Now termStates has numToMerge FieldMergeStates
                // which all share the same term.  Now we must
                // interleave the docID streams.
                while (numToMerge > 0)
                {
                    if ((++df % skipInterval) == 0)
                    {
                        skipListWriter.SetSkipData(lastDoc, currentFieldStorePayloads, lastPayloadLength);
                        skipListWriter.BufferSkip(df);
                    }

                    FreqProxFieldMergeState minState = termStates[0];
                    for (int i = 1; i < numToMerge; i++)
                    {
                        if (termStates[i].docID < minState.docID)
                        {
                            minState = termStates[i];
                        }
                    }

                    int doc         = minState.docID;
                    int termDocFreq = minState.termFreq;

                    System.Diagnostics.Debug.Assert(doc < flushState.numDocsInRAM);
                    System.Diagnostics.Debug.Assert(doc > lastDoc || df == 1);

                    ByteSliceReader prox = minState.prox;

                    // Carefully copy over the prox + payload info,
                    // changing the format to match Lucene's segment
                    // format.
                    if (!currentFieldOmitTf)
                    {
                        // omitTf == false so we do write positions & payload
                        System.Diagnostics.Debug.Assert(proxOut != null);
                        for (int j = 0; j < termDocFreq; j++)
                        {
                            int code = prox.ReadVInt();
                            if (currentFieldStorePayloads)
                            {
                                int payloadLength;
                                if ((code & 1) != 0)
                                {
                                    // This position has a payload
                                    payloadLength = prox.ReadVInt();
                                }
                                else
                                {
                                    payloadLength = 0;
                                }
                                if (payloadLength != lastPayloadLength)
                                {
                                    proxOut.WriteVInt(code | 1);
                                    proxOut.WriteVInt(payloadLength);
                                    lastPayloadLength = payloadLength;
                                }
                                else
                                {
                                    proxOut.WriteVInt(code & (~1));
                                }
                                if (payloadLength > 0)
                                {
                                    copyBytes(prox, proxOut, payloadLength);
                                }
                            }
                            else
                            {
                                System.Diagnostics.Debug.Assert(0 == (code & 1));
                                proxOut.WriteVInt(code >> 1);
                            }
                        } //End for

                        int newDocCode = (doc - lastDoc) << 1;

                        if (1 == termDocFreq)
                        {
                            freqOut.WriteVInt(newDocCode | 1);
                        }
                        else
                        {
                            freqOut.WriteVInt(newDocCode);
                            freqOut.WriteVInt(termDocFreq);
                        }
                    }
                    else
                    {
                        // omitTf==true: we store only the docs, without
                        // term freq, positions, payloads
                        freqOut.WriteVInt(doc - lastDoc);
                    }

                    lastDoc = doc;

                    if (!minState.nextDoc())
                    {
                        // Remove from termStates
                        int upto = 0;
                        for (int i = 0; i < numToMerge; i++)
                        {
                            if (termStates[i] != minState)
                            {
                                termStates[upto++] = termStates[i];
                            }
                        }
                        numToMerge--;
                        System.Diagnostics.Debug.Assert(upto == numToMerge);

                        // Advance this state to the next term

                        if (!minState.nextTerm())
                        {
                            // OK, no more terms, so remove from mergeStates
                            // as well
                            upto = 0;
                            for (int i = 0; i < numFields; i++)
                            {
                                if (mergeStates[i] != minState)
                                {
                                    mergeStates[upto++] = mergeStates[i];
                                }
                            }
                            numFields--;
                            System.Diagnostics.Debug.Assert(upto == numFields);
                        }
                    }
                }

                System.Diagnostics.Debug.Assert(df > 0);

                // Done merging this term

                long skipPointer = skipListWriter.WriteSkip(freqOut);

                // Write term
                termInfo.Set(df, freqPointer, proxPointer, (int)(skipPointer - freqPointer));

                // TODO: we could do this incrementally
                UnicodeUtil.UTF16toUTF8(text_Renamed, start, termsUTF8);

                // TODO: we could save O(n) re-scan of the term by
                // computing the shared prefix with the last term
                // while during the UTF8 encoding
                termsOut.Add(fieldNumber,
                             termsUTF8.result,
                             termsUTF8.length,
                             termInfo);
            }
        }
Example #20
0
        //internal override void flush(IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, DocumentsWriter.FlushState state)
        internal override void flush(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state)
        {
            //IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> childThreadsAndFields = new Dictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>();
            //IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> endChildThreadsAndFields = new Dictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>();
            IDictionary <object, ICollection <object> > childThreadsAndFields    = new Dictionary <object, ICollection <object> >();
            IDictionary <object, ICollection <object> > endChildThreadsAndFields = new Dictionary <object, ICollection <object> >();

            //IEnumerator<KeyValuePair<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>> it = threadsAndFields.GetEnumerator();
            IEnumerator <KeyValuePair <object, ICollection <object> > > it = threadsAndFields.GetEnumerator();

            while (it.MoveNext())
            {
                //KeyValuePair<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> entry = it.Current;
                KeyValuePair <object, ICollection <object> > entry = it.Current;

                DocInverterPerThread perThread = (DocInverterPerThread)entry.Key;

                //ICollection<DocFieldConsumerPerField> fields = entry.Value;
                ICollection <object> fields = entry.Value;
                //IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator();
                IEnumerator <object> fieldsIt = fields.GetEnumerator();

                //IDictionary<DocFieldConsumerPerField, DocFieldConsumerPerField> childFields = new Dictionary<DocFieldConsumerPerField, DocFieldConsumerPerField>();
                //IDictionary<DocFieldConsumerPerField, DocFieldConsumerPerField> endChildFields = new Dictionary<DocFieldConsumerPerField, DocFieldConsumerPerField>();
                IDictionary <object, object> childFields    = new Dictionary <object, object>();
                IDictionary <object, object> endChildFields = new Dictionary <object, object>();

                while (fieldsIt.MoveNext())
                {
                    DocInverterPerField perField = (DocInverterPerField)fieldsIt.Current;
                    childFields[perField.consumer]       = perField.consumer;
                    endChildFields[perField.endConsumer] = perField.endConsumer;
                }

                childThreadsAndFields[perThread.consumer] = childFields.Keys;
                // create new collection to provide for deletions in NormsWriter
                endChildThreadsAndFields[perThread.endConsumer] = new List <object>(endChildFields.Keys);
            }

            consumer.flush(childThreadsAndFields, state);
            endConsumer.flush(endChildThreadsAndFields, state);
        }
        // TODO: would be nice to factor out morme of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...

        internal override void flush(IDictionary <object, object> threadsAndFields, DocumentsWriter.FlushState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            List <object> allFields = new List <object>();

            IEnumerator <KeyValuePair <object, object> > it = threadsAndFields.GetEnumerator();

            while (it.MoveNext())
            {
                KeyValuePair <object, object> entry = (KeyValuePair <object, object>)it.Current;

                ICollection <object> fields = (ICollection <object>)entry.Value;

                IEnumerator <object> fieldsIt = fields.GetEnumerator();

                while (fieldsIt.MoveNext())
                {
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)fieldsIt.Current;
                    if (perField.termsHashPerField.numPostings > 0)
                    {
                        allFields.Add(perField);
                    }
                }
            }

            // Sort by field name
            allFields.Sort();
            int numAllFields = allFields.Count;

            TermInfosWriter termsOut = new TermInfosWriter(state.directory,
                                                           state.segmentName,
                                                           fieldInfos,
                                                           state.docWriter.writer.GetTermIndexInterval());

            IndexOutput freqOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.FREQ_EXTENSION));
            IndexOutput proxOut;

            if (fieldInfos.HasProx())
            {
                proxOut = state.directory.CreateOutput(state.SegmentFileName(IndexFileNames.PROX_EXTENSION));
            }
            else
            {
                proxOut = null;
            }

            DefaultSkipListWriter skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval,
                                                                             termsOut.maxSkipLevels,
                                                                             state.numDocsInRAM, freqOut, proxOut);

            int start = 0;

            while (start < numAllFields)
            {
                FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                string    fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))
                {
                    end++;
                }

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                {
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;
                }

                // If this field has postings then add them to the
                // segment
                AppendPostings(state, fields, termsOut, freqOut, proxOut, skipListWriter);

                for (int i = 0; i < fields.Length; i++)
                {
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings            = perField.numPostings;
                    perField.reset();
                    perField.shrinkHash(numPostings);
                    fields[i].reset();
                }

                start = end;
            }

            it = threadsAndFields.GetEnumerator();
            while (it.MoveNext())
            {
                KeyValuePair <object, object> entry     = (KeyValuePair <object, object>)it.Current;
                FreqProxTermsWriterPerThread  perThread = (FreqProxTermsWriterPerThread)entry.Key;
                perThread.termsHashPerThread.reset(true);
            }

            freqOut.Close();
            if (proxOut != null)
            {
                state.flushedFiles[state.SegmentFileName(IndexFileNames.PROX_EXTENSION)] = state.SegmentFileName(IndexFileNames.PROX_EXTENSION);
                proxOut.Close();
            }
            termsOut.Close();

            // Record all files we have flushed
            state.flushedFiles[state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION)] = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.FREQ_EXTENSION)]        = state.SegmentFileName(IndexFileNames.FREQ_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)]       = state.SegmentFileName(IndexFileNames.TERMS_EXTENSION);
            state.flushedFiles[state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)] = state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION);
        }