protected internal override float PhraseFreq() { // sort list with pq pq.Clear(); for (PhrasePositions pp = first; pp != null; pp = pp.next) { pp.FirstPosition(); pq.Put(pp); // build pq from list } PqToList(); // rebuild list from pq // for counting how many times the exact phrase is found in current document, // just count how many times all PhrasePosition's have exactly the same position. int freq = 0; do { // find position w/ all terms while (first.position < last.position) { // scan forward in first do { if (!first.NextPosition()) { return(freq); } }while (first.position < last.position); FirstToLast(); } freq++; // all equal: a match }while (last.NextPosition()); return(freq); }
/// <summary> Init PhrasePositions in place. /// There is a one time initialization for this scorer: /// <br/>- Put in repeats[] each pp that has another pp with same position in the doc. /// <br/>- Also mark each such pp by pp.repeats = true. /// <br/>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. /// In particular, this allows to score queries with no repetitions with no overhead due to this computation. /// <br/>- Example 1 - query with no repetitions: "ho my"~2 /// <br/>- Example 2 - query with repetitions: "ho my my"~2 /// <br/>- Example 3 - query with repetitions: "my ho my"~2 /// <br/>Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection. /// </summary> /// <returns> end (max position), or -1 if any term ran out (i.e. done) /// </returns> /// <throws> IOException </throws> private int InitPhrasePositions() { int end = 0; // no repeats at all (most common case is also the simplest one) if (checkedRepeats && repeats == null) { // build queue from list pq.Clear(); for (PhrasePositions pp = first; pp != null; pp = pp.next) { pp.FirstPosition(); if (pp.position > end) { end = pp.position; } pq.Put(pp); // build pq from list } return(end); } // position the pp's for (PhrasePositions pp = first; pp != null; pp = pp.next) { pp.FirstPosition(); } // one time initializatin for this scorer if (!checkedRepeats) { checkedRepeats = true; // check for repeats System.Collections.Hashtable m = null; for (PhrasePositions pp = first; pp != null; pp = pp.next) { int tpPos = pp.position + pp.offset; for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) { int tpPos2 = pp2.position + pp2.offset; if (tpPos2 == tpPos) { if (m == null) { m = new System.Collections.Hashtable(); } pp.repeats = true; pp2.repeats = true; m[pp] = null; m[pp2] = null; } } } if (m != null) { repeats = (PhrasePositions[])(new System.Collections.ArrayList(m.Keys).ToArray(typeof(PhrasePositions))); } } // with repeats must advance some repeating pp's so they all start with differing tp's if (repeats != null) { for (int i = 0; i < repeats.Length; i++) { PhrasePositions pp = repeats[i]; PhrasePositions pp2; while ((pp2 = TermPositionsDiffer(pp)) != null) { if (!pp2.NextPosition()) { // out of pps that do not differ, advance the pp with higher offset return(-1); // ran out of a term -- done } } } } // build queue from list pq.Clear(); for (PhrasePositions pp = first; pp != null; pp = pp.next) { if (pp.position > end) { end = pp.position; } pq.Put(pp); // build pq from list } if (repeats != null) { tmpPos = new PhrasePositions[pq.Size()]; } return(end); }