/// <summary>
        /// Score a candidate doc for all slop-valid position-combinations (matches)
        /// encountered while traversing/hopping the PhrasePositions.
        /// <para/> The score contribution of a match depends on the distance:
        /// <para/> - highest score for distance=0 (exact match).
        /// <para/> - score gets lower as distance gets higher.
        /// <para/>Example: for query "a b"~2, a document "x a b a y" can be scored twice:
        /// once for "a b" (distance=0), and once for "b a" (distance=2).
        /// <para/>Possibly not all valid combinations are encountered, because for efficiency
        /// we always propagate the least PhrasePosition. This allows to base on
        /// <see cref="Util.PriorityQueue{T}"/> and move forward faster.
        /// As result, for example, document "a b c b a"
        /// would score differently for queries "a b c"~4 and "c b a"~4, although
        /// they really are equivalent.
        /// Similarly, for doc "a b c b a f g", query "c b"~2
        /// would get same score as "g f"~2, although "c b"~2 could be matched twice.
        /// We may want to fix this in the future (currently not, for performance reasons).
        /// </summary>
        private float PhraseFreq()
        {
            if (!InitPhrasePositions())
            {
                return(0.0f);
            }
            float freq = 0.0f;

            numMatches = 0;
            PhrasePositions pp          = pq.Pop();
            int             matchLength = end - pp.position;
            int             next        = pq.Top.position;

            while (AdvancePP(pp))
            {
                if (hasRpts && !AdvanceRpts(pp))
                {
                    break;              // pps exhausted
                }
                if (pp.position > next) // done minimizing current match-length
                {
                    if (matchLength <= slop)
                    {
                        freq += docScorer.ComputeSlopFactor(matchLength); // score match
                        numMatches++;
                    }
                    pq.Add(pp);
                    pp          = pq.Pop();
                    next        = pq.Top.position;
                    matchLength = end - pp.position;
                }
                else
                {
                    int matchLength2 = end - pp.position;
                    if (matchLength2 < matchLength)
                    {
                        matchLength = matchLength2;
                    }
                }
            }
            if (matchLength <= slop)
            {
                freq += docScorer.ComputeSlopFactor(matchLength); // score match
                numMatches++;
            }
            return(freq);
        }
Example #2
0
 protected virtual bool SetFreqCurrentDoc()
 {
     if (!m_more)
     {
         return(false);
     }
     m_doc        = m_spans.Doc;
     m_freq       = 0.0f;
     m_numMatches = 0;
     do
     {
         int matchLength = m_spans.End - m_spans.Start;
         m_freq += m_docScorer.ComputeSlopFactor(matchLength);
         m_numMatches++;
         m_more = m_spans.Next();
     } while (m_more && (m_doc == m_spans.Doc));
     return(true);
 }