예제 #1
0
        protected internal override float PhraseFreq()
        {
            // sort list with pq
            pq.Clear();
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                pp.FirstPosition();
                pq.Add(pp);         // build pq from list
            }
            PqToList();             // rebuild list from pq

            // for counting how many times the exact phrase is found in current document,
            // just count how many times all PhrasePosition's have exactly the same position.
            int freq = 0;

            do
            {
                // find position w/ all terms
                while (first.position < last.position)
                {
                    // scan forward in first
                    do
                    {
                        if (!first.NextPosition())
                        {
                            return(freq);
                        }
                    }while (first.position < last.position);
                    FirstToLast();
                }
                freq++;                 // all equal: a match
            }while (last.NextPosition());

            return(freq);
        }
예제 #2
0
        /// <summary> Init PhrasePositions in place.
        /// There is a one time initialization for this scorer:
        /// <br/>- Put in repeats[] each pp that has another pp with same position in the doc.
        /// <br/>- Also mark each such pp by pp.repeats = true.
        /// <br/>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
        /// In particular, this allows to score queries with no repetitions with no overhead due to this computation.
        /// <br/>- Example 1 - query with no repetitions: "ho my"~2
        /// <br/>- Example 2 - query with repetitions: "ho my my"~2
        /// <br/>- Example 3 - query with repetitions: "my ho my"~2
        /// <br/>Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection.
        /// </summary>
        /// <returns> end (max position), or -1 if any term ran out (i.e. done)
        /// </returns>
        /// <throws>  IOException  </throws>
        private int InitPhrasePositions()
        {
            int end = 0;

            // no repeats at all (most common case is also the simplest one)
            if (checkedRepeats && repeats == null)
            {
                // build queue from list
                pq.Clear();
                for (PhrasePositions pp = first; pp != null; pp = pp.next)
                {
                    pp.FirstPosition();
                    if (pp.position > end)
                    {
                        end = pp.position;
                    }
                    pq.Add(pp);                     // build pq from list
                }
                return(end);
            }

            // position the pp's
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                pp.FirstPosition();
            }

            // one time initializatin for this scorer
            if (!checkedRepeats)
            {
                checkedRepeats = true;
                // check for repeats
                HashMap <PhrasePositions, object> m = null;
                for (PhrasePositions pp = first; pp != null; pp = pp.next)
                {
                    int tpPos = pp.position + pp.offset;
                    for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next)
                    {
                        int tpPos2 = pp2.position + pp2.offset;
                        if (tpPos2 == tpPos)
                        {
                            if (m == null)
                            {
                                m = new HashMap <PhrasePositions, object>();
                            }
                            pp.repeats  = true;
                            pp2.repeats = true;
                            m[pp]       = null;
                            m[pp2]      = null;
                        }
                    }
                }
                if (m != null)
                {
                    repeats = m.Keys.ToArray();
                }
            }

            // with repeats must advance some repeating pp's so they all start with differing tp's
            if (repeats != null)
            {
                for (int i = 0; i < repeats.Length; i++)
                {
                    PhrasePositions pp = repeats[i];
                    PhrasePositions pp2;
                    while ((pp2 = TermPositionsDiffer(pp)) != null)
                    {
                        if (!pp2.NextPosition())
                        {
                            // out of pps that do not differ, advance the pp with higher offset
                            return(-1);                             // ran out of a term -- done
                        }
                    }
                }
            }

            // build queue from list
            pq.Clear();
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                if (pp.position > end)
                {
                    end = pp.position;
                }
                pq.Add(pp);                 // build pq from list
            }

            if (repeats != null)
            {
                tmpPos = new PhrasePositions[pq.Size()];
            }
            return(end);
        }