コード例 #1
0
        /// <summary> Score a candidate doc for all slop-valid position-combinations (matches)
        /// encountered while traversing/hopping the PhrasePositions.
        /// <br/> The score contribution of a match depends on the distance:
        /// <br/> - highest score for distance=0 (exact match).
        /// <br/> - score gets lower as distance gets higher.
        /// <br/>Example: for query "a b"~2, a document "x a b a y" can be scored twice:
        /// once for "a b" (distance=0), and once for "b a" (distance=2).
        /// <br/>Possibly not all valid combinations are encountered, because for efficiency
        /// we always propagate the least PhrasePosition. This allows to base on
        /// PriorityQueue and move forward faster.
        /// As result, for example, document "a b c b a"
        /// would score differently for queries "a b c"~4 and "c b a"~4, although
        /// they really are equivalent.
        /// Similarly, for doc "a b c b a f g", query "c b"~2
        /// would get same score as "g f"~2, although "c b"~2 could be matched twice.
        /// We may want to fix this in the future (currently not, for performance reasons).
        /// </summary>
        protected internal override float PhraseFreq()
        {
            int end = InitPhrasePositions();

            float freq = 0.0f;
            bool  done = (end < 0);

            while (!done)
            {
                PhrasePositions pp    = pq.Pop();
                int             start = pp.position;
                int             next  = pq.Top().position;

                bool tpsDiffer = true;
                for (int pos = start; pos <= next || !tpsDiffer; pos = pp.position)
                {
                    if (pos <= next && tpsDiffer)
                    {
                        start = pos;                         // advance pp to min window
                    }
                    if (!pp.NextPosition())
                    {
                        done = true;                         // ran out of a term -- done
                        break;
                    }
                    PhrasePositions pp2 = null;
                    tpsDiffer = !pp.repeats || (pp2 = TermPositionsDiffer(pp)) == null;
                    if (pp2 != null && pp2 != pp)
                    {
                        pp = Flip(pp, pp2);                         // flip pp to pp2
                    }
                }

                int matchLength = end - start;
                if (matchLength <= slop)
                {
                    freq += Similarity.SloppyFreq(matchLength);                     // score match
                }
                if (pp.position > end)
                {
                    end = pp.position;
                }
                pq.Add(pp);                 // restore pq
            }

            return(freq);
        }
コード例 #2
0
 public override float SloppyFreq(int distance)
 {
     return(delegee.SloppyFreq(distance));
 }