예제 #1
0
 /// <summary>
 /// Finishes the current term; numDocs must be &gt; 0.
 /// <c>stats.TotalTermFreq</c> will be -1 when term
 /// frequencies are omitted for the field.
 /// </summary>
 public abstract void FinishTerm(BytesRef text, TermStats stats);
예제 #2
0
 /// <summary>
 /// Finishes the current term; numDocs must be > 0.
 ///  <code>stats.totalTermFreq</code> will be -1 when term
 ///  frequencies are omitted for the field.
 /// </summary>
 public abstract void FinishTerm(BytesRef text, TermStats stats);
예제 #3
0
        public virtual void Merge(MergeState mergeState, IndexOptions indexOptions, TermsEnum termsEnum)
        {
            BytesRef term;

            Debug.Assert(termsEnum != null);
            long        sumTotalTermFreq         = 0;
            long        sumDocFreq               = 0;
            long        sumDFsinceLastAbortCheck = 0;
            FixedBitSet visitedDocs              = new FixedBitSet(mergeState.SegmentInfo.DocCount);

            if (indexOptions == IndexOptions.DOCS_ONLY)
            {
                if (docsEnum == null)
                {
                    docsEnum = new MappingMultiDocsEnum();
                }
                docsEnum.MergeState = mergeState;

                MultiDocsEnum docsEnumIn = null;

                while ((term = termsEnum.Next()) != null)
                {
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    docsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsEnumIn, DocsFlags.NONE);
                    if (docsEnumIn != null)
                    {
                        docsEnum.Reset(docsEnumIn);
                        PostingsConsumer postingsConsumer = StartTerm(term);
                        TermStats        stats            = postingsConsumer.Merge(mergeState, indexOptions, docsEnum, visitedDocs);
                        if (stats.DocFreq > 0)
                        {
                            FinishTerm(term, stats);
                            sumTotalTermFreq         += stats.DocFreq;
                            sumDFsinceLastAbortCheck += stats.DocFreq;
                            sumDocFreq += stats.DocFreq;
                            if (sumDFsinceLastAbortCheck > 60000)
                            {
                                mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                                sumDFsinceLastAbortCheck = 0;
                            }
                        }
                    }
                }
            }
            else if (indexOptions == IndexOptions.DOCS_AND_FREQS)
            {
                if (docsAndFreqsEnum == null)
                {
                    docsAndFreqsEnum = new MappingMultiDocsEnum();
                }
                docsAndFreqsEnum.MergeState = mergeState;

                MultiDocsEnum docsAndFreqsEnumIn = null;

                while ((term = termsEnum.Next()) != null)
                {
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    docsAndFreqsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsAndFreqsEnumIn);
                    Debug.Assert(docsAndFreqsEnumIn != null);
                    docsAndFreqsEnum.Reset(docsAndFreqsEnumIn);
                    PostingsConsumer postingsConsumer = StartTerm(term);
                    TermStats        stats            = postingsConsumer.Merge(mergeState, indexOptions, docsAndFreqsEnum, visitedDocs);
                    if (stats.DocFreq > 0)
                    {
                        FinishTerm(term, stats);
                        sumTotalTermFreq         += stats.TotalTermFreq;
                        sumDFsinceLastAbortCheck += stats.DocFreq;
                        sumDocFreq += stats.DocFreq;
                        if (sumDFsinceLastAbortCheck > 60000)
                        {
                            mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                            sumDFsinceLastAbortCheck = 0;
                        }
                    }
                }
            }
            else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
            {
                if (postingsEnum == null)
                {
                    postingsEnum = new MappingMultiDocsAndPositionsEnum();
                }
                postingsEnum.MergeState = mergeState;
                MultiDocsAndPositionsEnum postingsEnumIn = null;
                while ((term = termsEnum.Next()) != null)
                {
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn, DocsAndPositionsFlags.PAYLOADS);
                    Debug.Assert(postingsEnumIn != null);
                    postingsEnum.Reset(postingsEnumIn);

                    PostingsConsumer postingsConsumer = StartTerm(term);
                    TermStats        stats            = postingsConsumer.Merge(mergeState, indexOptions, postingsEnum, visitedDocs);
                    if (stats.DocFreq > 0)
                    {
                        FinishTerm(term, stats);
                        sumTotalTermFreq         += stats.TotalTermFreq;
                        sumDFsinceLastAbortCheck += stats.DocFreq;
                        sumDocFreq += stats.DocFreq;
                        if (sumDFsinceLastAbortCheck > 60000)
                        {
                            mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                            sumDFsinceLastAbortCheck = 0;
                        }
                    }
                }
            }
            else
            {
                Debug.Assert(indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                if (postingsEnum == null)
                {
                    postingsEnum = new MappingMultiDocsAndPositionsEnum();
                }
                postingsEnum.MergeState = mergeState;
                MultiDocsAndPositionsEnum postingsEnumIn = null;
                while ((term = termsEnum.Next()) != null)
                {
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn);
                    Debug.Assert(postingsEnumIn != null);
                    postingsEnum.Reset(postingsEnumIn);

                    PostingsConsumer postingsConsumer = StartTerm(term);
                    TermStats        stats            = postingsConsumer.Merge(mergeState, indexOptions, postingsEnum, visitedDocs);
                    if (stats.DocFreq > 0)
                    {
                        FinishTerm(term, stats);
                        sumTotalTermFreq         += stats.TotalTermFreq;
                        sumDFsinceLastAbortCheck += stats.DocFreq;
                        sumDocFreq += stats.DocFreq;
                        if (sumDFsinceLastAbortCheck > 60000)
                        {
                            mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                            sumDFsinceLastAbortCheck = 0;
                        }
                    }
                }
            }
            Finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.Cardinality());
        }
 public override bool IsIndexTerm(BytesRef term, TermStats stats)
 {
     if (count >= interval)
     {
         count = 1;
         return true;
     }
     else
     {
         count++;
         return false;
     }
 }
 /// <summary>
 /// Called sequentially on every term being written
 /// returning true if this term should be indexed
 /// </summary>
 public abstract bool IsIndexTerm(BytesRef term, TermStats stats);
 public override void Add(BytesRef text, TermStats stats, long termsFilePointer)
 {
     if (text.Length == 0)
     {
         // We already added empty string in ctor
         Debug.Assert(termsFilePointer == startTermsFilePointer);
         return;
     }
     int lengthSave = text.Length;
     text.Length = IndexedTermPrefixLength(lastTerm, text);
     try
     {
         fstBuilder.Add(Util.ToIntsRef(text, scratchIntsRef), termsFilePointer);
     }
     finally
     {
         text.Length = lengthSave;
     }
     lastTerm.CopyBytes(text);
 }
 public override bool CheckIndexTerm(BytesRef text, TermStats stats)
 {
     //System.out.println("VGW: index term=" + text.utf8ToString());
     // NOTE: we must force the first term per field to be
     // indexed, in case policy doesn't:
     if (policy.isIndexTerm(text, stats) || first)
     {
         first = false;
         //System.out.println("  YES");
         return true;
     }
     else
     {
         lastTerm.CopyBytes(text);
         return false;
     }
 }
 public override bool IsIndexTerm(BytesRef term, TermStats stats)
 {
     if (stats.DocFreq >= docFreqThresh || count >= interval)
     {
         count = 1;
         return true;
     }
     else
     {
         count++;
         return false;
     }
 }