Esempio n. 1
0
        /// <summary>
        /// <seealso cref="FieldFragList.Add(int, int, IList{WeightedPhraseInfo})"/>
        /// </summary>
        public override void Add(int startOffset, int endOffset, IList <WeightedPhraseInfo> phraseInfoList)
        {
            float          totalBoost = 0;
            List <SubInfo> subInfos   = new List <SubInfo>();

            foreach (WeightedPhraseInfo phraseInfo in phraseInfoList)
            {
                subInfos.Add(new SubInfo(phraseInfo.GetText(), phraseInfo.TermsOffsets, phraseInfo.Seqnum, phraseInfo.Boost));
                totalBoost += phraseInfo.Boost;
            }
            FragInfos.Add(new WeightedFragInfo(startOffset, endOffset, subInfos, totalBoost));
        }
Esempio n. 2
0
        /// <summary>
        /// <seealso cref="FieldFragList.Add(int, int, IList{WeightedPhraseInfo})"/>.
        /// </summary>
        public override void Add(int startOffset, int endOffset, IList <WeightedPhraseInfo> phraseInfoList)
        {
            List <SubInfo>   tempSubInfos  = new List <SubInfo>();
            List <SubInfo>   realSubInfos  = new List <SubInfo>();
            HashSet <string> distinctTerms = new HashSet <string>();
            int length = 0;

            foreach (WeightedPhraseInfo phraseInfo in phraseInfoList)
            {
                float phraseTotalBoost = 0;
                foreach (TermInfo ti in phraseInfo.TermsInfos)
                {
                    if (distinctTerms.Add(ti.Text))
                    {
                        phraseTotalBoost += ti.Weight * phraseInfo.Boost;
                    }
                    length++;
                }
                tempSubInfos.Add(new SubInfo(phraseInfo.GetText(), phraseInfo.TermsOffsets,
                                             phraseInfo.Seqnum, phraseTotalBoost));
            }

            // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
            // would cause an equal weight for all fragments regardless of how much words they contain.
            // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
            // we "bend" the length with a standard-normalization a little bit.
            float norm = length * (1 / (float)Math.Sqrt(length));

            float totalBoost = 0;

            foreach (SubInfo tempSubInfo in tempSubInfos)
            {
                float subInfoBoost = tempSubInfo.Boost * norm;
                realSubInfos.Add(new SubInfo(tempSubInfo.Text, tempSubInfo.TermsOffsets,
                                             tempSubInfo.Seqnum, subInfoBoost));
                totalBoost += subInfoBoost;
            }

            FragInfos.Add(new WeightedFragInfo(startOffset, endOffset, realSubInfos, totalBoost));
        }