Beispiel #1
0
        /// <summary>
        /// Score a candidate doc for all slop-valid position-combinations (matches)
        /// encountered while traversing/hopping the PhrasePositions.
        /// <para/> The score contribution of a match depends on the distance:
        /// <para/> - highest score for distance=0 (exact match).
        /// <para/> - score gets lower as distance gets higher.
        /// <para/>Example: for query "a b"~2, a document "x a b a y" can be scored twice:
        /// once for "a b" (distance=0), and once for "b a" (distance=2).
        /// <para/>Possibly not all valid combinations are encountered, because for efficiency
        /// we always propagate the least PhrasePosition. This allows to base on
        /// <see cref="Util.PriorityQueue{T}"/> and move forward faster.
        /// As result, for example, document "a b c b a"
        /// would score differently for queries "a b c"~4 and "c b a"~4, although
        /// they really are equivalent.
        /// Similarly, for doc "a b c b a f g", query "c b"~2
        /// would get same score as "g f"~2, although "c b"~2 could be matched twice.
        /// We may want to fix this in the future (currently not, for performance reasons).
        /// </summary>
        private float PhraseFreq()
        {
            if (!InitPhrasePositions())
            {
                return(0.0f);
            }
            float freq = 0.0f;

            numMatches = 0;
            PhrasePositions pp          = pq.Pop();
            int             matchLength = end - pp.position;
            int             next        = pq.Top.position;

            while (AdvancePP(pp))
            {
                if (hasRpts && !AdvanceRpts(pp))
                {
                    break;              // pps exhausted
                }
                if (pp.position > next) // done minimizing current match-length
                {
                    if (matchLength <= slop)
                    {
                        freq += docScorer.ComputeSlopFactor(matchLength); // score match
                        numMatches++;
                    }
                    pq.Add(pp);
                    pp          = pq.Pop();
                    next        = pq.Top.position;
                    matchLength = end - pp.position;
                }
                else
                {
                    int matchLength2 = end - pp.position;
                    if (matchLength2 < matchLength)
                    {
                        matchLength = matchLength2;
                    }
                }
            }
            if (matchLength <= slop)
            {
                freq += docScorer.ComputeSlopFactor(matchLength); // score match
                numMatches++;
            }
            return(freq);
        }
Beispiel #2
0
 protected internal void  PqToList()
 {
     last = first = null;
     while (pq.Top() != null)
     {
         PhrasePositions pp = pq.Pop();
         if (last != null)
         {
             // add next to end of list
             last.next = pp;
         }
         else
         {
             first = pp;
         }
         last    = pp;
         pp.next = null;
     }
 }