/// <summary> /// Finishes the current term; numDocs must be > 0. /// <c>stats.TotalTermFreq</c> will be -1 when term /// frequencies are omitted for the field. /// </summary> public abstract void FinishTerm(BytesRef text, TermStats stats);
/// <summary> /// Finishes the current term; numDocs must be > 0. /// <code>stats.totalTermFreq</code> will be -1 when term /// frequencies are omitted for the field. /// </summary> public abstract void FinishTerm(BytesRef text, TermStats stats);
public virtual void Merge(MergeState mergeState, IndexOptions indexOptions, TermsEnum termsEnum) { BytesRef term; Debug.Assert(termsEnum != null); long sumTotalTermFreq = 0; long sumDocFreq = 0; long sumDFsinceLastAbortCheck = 0; FixedBitSet visitedDocs = new FixedBitSet(mergeState.SegmentInfo.DocCount); if (indexOptions == IndexOptions.DOCS_ONLY) { if (docsEnum == null) { docsEnum = new MappingMultiDocsEnum(); } docsEnum.MergeState = mergeState; MultiDocsEnum docsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: docsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsEnumIn, DocsFlags.NONE); if (docsEnumIn != null) { docsEnum.Reset(docsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, docsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.DocFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { if (docsAndFreqsEnum == null) { docsAndFreqsEnum = new MappingMultiDocsEnum(); } docsAndFreqsEnum.MergeState = mergeState; MultiDocsEnum docsAndFreqsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: docsAndFreqsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsAndFreqsEnumIn); Debug.Assert(docsAndFreqsEnumIn != null); docsAndFreqsEnum.Reset(docsAndFreqsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, docsAndFreqsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { if (postingsEnum == null) { postingsEnum = new MappingMultiDocsAndPositionsEnum(); } postingsEnum.MergeState = mergeState; MultiDocsAndPositionsEnum postingsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn, DocsAndPositionsFlags.PAYLOADS); Debug.Assert(postingsEnumIn != null); postingsEnum.Reset(postingsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, postingsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } else { Debug.Assert(indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); if (postingsEnum == null) { postingsEnum = new MappingMultiDocsAndPositionsEnum(); } postingsEnum.MergeState = mergeState; MultiDocsAndPositionsEnum postingsEnumIn = null; while ((term = termsEnum.Next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn); Debug.Assert(postingsEnumIn != null); postingsEnum.Reset(postingsEnumIn); PostingsConsumer postingsConsumer = StartTerm(term); TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, postingsEnum, visitedDocs); if (stats.DocFreq > 0) { FinishTerm(term, stats); sumTotalTermFreq += stats.TotalTermFreq; sumDFsinceLastAbortCheck += stats.DocFreq; sumDocFreq += stats.DocFreq; if (sumDFsinceLastAbortCheck > 60000) { mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0); sumDFsinceLastAbortCheck = 0; } } } } Finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.Cardinality()); }
public override bool IsIndexTerm(BytesRef term, TermStats stats) { if (count >= interval) { count = 1; return true; } else { count++; return false; } }
/// <summary> /// Called sequentially on every term being written /// returning true if this term should be indexed /// </summary> public abstract bool IsIndexTerm(BytesRef term, TermStats stats);
public override void Add(BytesRef text, TermStats stats, long termsFilePointer) { if (text.Length == 0) { // We already added empty string in ctor Debug.Assert(termsFilePointer == startTermsFilePointer); return; } int lengthSave = text.Length; text.Length = IndexedTermPrefixLength(lastTerm, text); try { fstBuilder.Add(Util.ToIntsRef(text, scratchIntsRef), termsFilePointer); } finally { text.Length = lengthSave; } lastTerm.CopyBytes(text); }
public override bool CheckIndexTerm(BytesRef text, TermStats stats) { //System.out.println("VGW: index term=" + text.utf8ToString()); // NOTE: we must force the first term per field to be // indexed, in case policy doesn't: if (policy.isIndexTerm(text, stats) || first) { first = false; //System.out.println(" YES"); return true; } else { lastTerm.CopyBytes(text); return false; } }
public override bool IsIndexTerm(BytesRef term, TermStats stats) { if (stats.DocFreq >= docFreqThresh || count >= interval) { count = 1; return true; } else { count++; return false; } }