/// <summary> /// Score a candidate doc for all slop-valid position-combinations (matches) /// encountered while traversing/hopping the PhrasePositions. /// <br> The score contribution of a match depends on the distance: /// <br> - highest score for distance=0 (exact match). /// <br> - score gets lower as distance gets higher. /// <br>Example: for query "a b"~2, a document "x a b a y" can be scored twice: /// once for "a b" (distance=0), and once for "b a" (distance=2). /// <br>Possibly not all valid combinations are encountered, because for efficiency /// we always propagate the least PhrasePosition. this allows to base on /// PriorityQueue and move forward faster. /// As result, for example, document "a b c b a" /// would score differently for queries "a b c"~4 and "c b a"~4, although /// they really are equivalent. /// Similarly, for doc "a b c b a f g", query "c b"~2 /// would get same score as "g f"~2, although "c b"~2 could be matched twice. /// We may want to fix this in the future (currently not, for performance reasons). /// </summary> private float PhraseFreq() { if (!InitPhrasePositions()) { return(0.0f); } float freq = 0.0f; NumMatches = 0; PhrasePositions pp = Pq.Pop(); int matchLength = End - pp.Position; int next = Pq.Top().Position; while (AdvancePP(pp)) { if (HasRpts && !AdvanceRpts(pp)) { break; // pps exhausted } if (pp.Position > next) // done minimizing current match-length { if (matchLength <= Slop) { freq += DocScorer.ComputeSlopFactor(matchLength); // score match NumMatches++; } Pq.Add(pp); pp = Pq.Pop(); next = Pq.Top().Position; matchLength = End - pp.Position; } else { int matchLength2 = End - pp.Position; if (matchLength2 < matchLength) { matchLength = matchLength2; } } } if (matchLength <= Slop) { freq += DocScorer.ComputeSlopFactor(matchLength); // score match NumMatches++; } return(freq); }
protected internal void PqToList() { last = first = null; while (pq.Top() != null) { PhrasePositions pp = pq.Pop(); if (last != null) { // add next to end of list last.next = pp; } else { first = pp; } last = pp; pp.next = null; } }