/// <summary> /// Called during merging to merge all <seealso cref="Fields"/> from /// sub-readers. this must recurse to merge all postings /// (terms, docs, positions, etc.). A {@link /// PostingsFormat} can override this default /// implementation to do its own merging. /// </summary> public virtual void Merge(MergeState mergeState, Fields fields) { foreach (string field in fields) { FieldInfo info = mergeState.FieldInfos.FieldInfo(field); Debug.Assert(info != null, "FieldInfo for field is null: " + field); Terms terms = fields.Terms(field); if (terms != null) { TermsConsumer termsConsumer = AddField(info); termsConsumer.Merge(mergeState, info.FieldIndexOptions, terms.Iterator(null)); } } }
private void VerifyVectors(Fields vectors, int num) { foreach (string field in vectors) { Terms terms = vectors.Terms(field); Debug.Assert(terms != null); VerifyVector(terms.Iterator(null), num); } }
/// <summary> /// Safe (but, slowish) default method to write every /// vector field in the document. /// </summary> protected internal void AddAllDocVectors(Fields vectors, MergeState mergeState) { if (vectors == null) { StartDocument(0); FinishDocument(); return; } int numFields = vectors.Size; if (numFields == -1) { // count manually! TODO: Maybe enforce that Fields.size() returns something valid? numFields = 0; //for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();) foreach (string it in vectors) { numFields++; } } StartDocument(numFields); string lastFieldName = null; TermsEnum termsEnum = null; DocsAndPositionsEnum docsAndPositionsEnum = null; int fieldCount = 0; foreach (string fieldName in vectors) { fieldCount++; FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName); Debug.Assert(lastFieldName == null || fieldName.CompareTo(lastFieldName) > 0, "lastFieldName=" + lastFieldName + " fieldName=" + fieldName); lastFieldName = fieldName; Terms terms = vectors.Terms(fieldName); if (terms == null) { // FieldsEnum shouldn't lie... continue; } bool hasPositions = terms.HasPositions(); bool hasOffsets = terms.HasOffsets(); bool hasPayloads = terms.HasPayloads(); Debug.Assert(!hasPayloads || hasPositions); int numTerms = (int)terms.Size(); if (numTerms == -1) { // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function numTerms = 0; termsEnum = terms.Iterator(termsEnum); while (termsEnum.Next() != null) { numTerms++; } } StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads); termsEnum = terms.Iterator(termsEnum); int termCount = 0; while (termsEnum.Next() != null) { termCount++; int freq = (int)termsEnum.TotalTermFreq(); StartTerm(termsEnum.Term(), freq); if (hasPositions || hasOffsets) { docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum); Debug.Assert(docsAndPositionsEnum != null); int docID = docsAndPositionsEnum.NextDoc(); Debug.Assert(docID != DocIdSetIterator.NO_MORE_DOCS); Debug.Assert(docsAndPositionsEnum.Freq() == freq); for (int posUpto = 0; posUpto < freq; posUpto++) { int pos = docsAndPositionsEnum.NextPosition(); int startOffset = docsAndPositionsEnum.StartOffset(); int endOffset = docsAndPositionsEnum.EndOffset(); BytesRef payload = docsAndPositionsEnum.Payload; Debug.Assert(!hasPositions || pos >= 0); AddPosition(pos, startOffset, endOffset, payload); } } FinishTerm(); } Debug.Assert(termCount == numTerms); FinishField(); } Debug.Assert(fieldCount == numFields); FinishDocument(); }