/// <summary> /// <seealso cref="FieldFragList.Add(int, int, IList{WeightedPhraseInfo})"/> /// </summary> public override void Add(int startOffset, int endOffset, IList <WeightedPhraseInfo> phraseInfoList) { float totalBoost = 0; List <SubInfo> subInfos = new List <SubInfo>(); foreach (WeightedPhraseInfo phraseInfo in phraseInfoList) { subInfos.Add(new SubInfo(phraseInfo.GetText(), phraseInfo.TermsOffsets, phraseInfo.Seqnum, phraseInfo.Boost)); totalBoost += phraseInfo.Boost; } FragInfos.Add(new WeightedFragInfo(startOffset, endOffset, subInfos, totalBoost)); }
/// <summary> /// <seealso cref="FieldFragList.Add(int, int, IList{WeightedPhraseInfo})"/>. /// </summary> public override void Add(int startOffset, int endOffset, IList <WeightedPhraseInfo> phraseInfoList) { List <SubInfo> tempSubInfos = new List <SubInfo>(); List <SubInfo> realSubInfos = new List <SubInfo>(); HashSet <string> distinctTerms = new HashSet <string>(); int length = 0; foreach (WeightedPhraseInfo phraseInfo in phraseInfoList) { float phraseTotalBoost = 0; foreach (TermInfo ti in phraseInfo.TermsInfos) { if (distinctTerms.Add(ti.Text)) { phraseTotalBoost += ti.Weight * phraseInfo.Boost; } length++; } tempSubInfos.Add(new SubInfo(phraseInfo.GetText(), phraseInfo.TermsOffsets, phraseInfo.Seqnum, phraseTotalBoost)); } // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query // would cause an equal weight for all fragments regardless of how much words they contain. // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments // we "bend" the length with a standard-normalization a little bit. float norm = length * (1 / (float)Math.Sqrt(length)); float totalBoost = 0; foreach (SubInfo tempSubInfo in tempSubInfos) { float subInfoBoost = tempSubInfo.Boost * norm; realSubInfos.Add(new SubInfo(tempSubInfo.Text, tempSubInfo.TermsOffsets, tempSubInfo.Seqnum, subInfoBoost)); totalBoost += subInfoBoost; } FragInfos.Add(new WeightedFragInfo(startOffset, endOffset, realSubInfos, totalBoost)); }