Exemplo n.º 1
0
 /// <summary>
 /// Move all PPs to their first position </summary>
 private void PlaceFirstPositions()
 {
     for (PhrasePositions pp = min, prev = null; prev != max; pp = (prev = pp).next) // iterate cyclic list: done once handled max
     {
         pp.FirstPosition();
     }
 }
Exemplo n.º 2
0
        protected internal override float PhraseFreq()
        {
            // sort list with pq
            pq.Clear();
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                pp.FirstPosition();
                pq.Put(pp);         // build pq from list
            }
            PqToList();             // rebuild list from pq

            // for counting how many times the exact phrase is found in current document,
            // just count how many times all PhrasePosition's have exactly the same position.
            int freq = 0;

            do
            {
                // find position w/ all terms
                while (first.position < last.position)
                {
                    // scan forward in first
                    do
                    {
                        if (!first.NextPosition())
                        {
                            return(freq);
                        }
                    }while (first.position < last.position);
                    FirstToLast();
                }
                freq++;                 // all equal: a match
            }while (last.NextPosition());

            return(freq);
        }
Exemplo n.º 3
0
        protected internal override float PhraseFreq()
        {
            // sort list with pq
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                pp.FirstPosition();
                pq.Put(pp);         // build pq from list
            }
            PqToList();             // rebuild list from pq

            int freq = 0;

            do
            {
                // find position w/ all terms
                while (first.position < last.position)
                {
                    // scan forward in first
                    do
                    {
                        if (!first.NextPosition())
                        {
                            return((float)freq);
                        }
                    }while (first.position < last.position);
                    FirstToLast();
                }
                freq++;                 // all equal: a match
            }while (last.NextPosition());

            return((float)freq);
        }
Exemplo n.º 4
0
 /// <summary>
 /// No repeats: simplest case, and most common. It is important to keep this piece of the code simple and efficient </summary>
 private void InitSimple()
 {
     //System.err.println("initSimple: doc: "+min.doc);
     pq.Clear();
     // position pps and build queue from list
     for (PhrasePositions pp = min, prev = null; prev != max; pp = (prev = pp).next) // iterate cyclic list: done once handled max
     {
         pp.FirstPosition();
         if (pp.position > end)
         {
             end = pp.position;
         }
         pq.Add(pp);
     }
 }
Exemplo n.º 5
0
        protected internal override float PhraseFreq()
        {
            pq.Clear();
            int end = 0;

            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                pp.FirstPosition();
                if (pp.position > end)
                {
                    end = pp.position;
                }
                pq.Put(pp);                 // build pq from list
            }

            float freq = 0.0f;
            bool  done = false;

            do
            {
                PhrasePositions pp    = (PhrasePositions)pq.Pop();
                int             start = pp.position;
                int             next  = ((PhrasePositions)pq.Top()).position;
                for (int pos = start; pos <= next; pos = pp.position)
                {
                    start = pos;                     // advance pp to min window
                    if (!pp.NextPosition())
                    {
                        done = true;                         // ran out of a term -- done
                        break;
                    }
                }

                int matchLength = end - start;
                if (matchLength <= slop)
                {
                    freq += GetSimilarity().SloppyFreq(matchLength);                     // score match
                }
                if (pp.position > end)
                {
                    end = pp.position;
                }
                pq.Put(pp);                 // restore pq
            }while (!done);

            return(freq);
        }
Exemplo n.º 6
0
        /// <summary> Init PhrasePositions in place.
        /// There is a one time initialization for this scorer:
        /// <br/>- Put in repeats[] each pp that has another pp with same position in the doc.
        /// <br/>- Also mark each such pp by pp.repeats = true.
        /// <br/>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
        /// In particular, this allows to score queries with no repetitions with no overhead due to this computation.
        /// <br/>- Example 1 - query with no repetitions: "ho my"~2
        /// <br/>- Example 2 - query with repetitions: "ho my my"~2
        /// <br/>- Example 3 - query with repetitions: "my ho my"~2
        /// <br/>Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection.
        /// </summary>
        /// <returns> end (max position), or -1 if any term ran out (i.e. done)
        /// </returns>
        /// <throws>  IOException  </throws>
        private int InitPhrasePositions()
        {
            int end = 0;

            // no repeats at all (most common case is also the simplest one)
            if (checkedRepeats && repeats == null)
            {
                // build queue from list
                pq.Clear();
                for (PhrasePositions pp = first; pp != null; pp = pp.next)
                {
                    pp.FirstPosition();
                    if (pp.position > end)
                    {
                        end = pp.position;
                    }
                    pq.Put(pp);                     // build pq from list
                }
                return(end);
            }

            // position the pp's
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                pp.FirstPosition();
            }

            // one time initializatin for this scorer
            if (!checkedRepeats)
            {
                checkedRepeats = true;
                // check for repeats
                Support.Dictionary <PhrasePositions, Object> m = null;
                for (PhrasePositions pp = first; pp != null; pp = pp.next)
                {
                    int tpPos = pp.position + pp.offset;
                    for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next)
                    {
                        int tpPos2 = pp2.position + pp2.offset;
                        if (tpPos2 == tpPos)
                        {
                            if (m == null)
                            {
                                m = new Support.Dictionary <PhrasePositions, object>();
                            }
                            pp.repeats  = true;
                            pp2.repeats = true;
                            m[pp]       = null;
                            m[pp2]      = null;
                        }
                    }
                }
                if (m != null)
                {
                    repeats = new PhrasePositions[m.Keys.Count];
                    m.Keys.CopyTo(repeats, 0);
                }
            }

            // with repeats must advance some repeating pp's so they all start with differing tp's
            if (repeats != null)
            {
                for (int i = 0; i < repeats.Length; i++)
                {
                    PhrasePositions pp = repeats[i];
                    PhrasePositions pp2;
                    while ((pp2 = TermPositionsDiffer(pp)) != null)
                    {
                        if (!pp2.NextPosition())
                        {
                            // out of pps that do not differ, advance the pp with higher offset
                            return(-1);                             // ran out of a term -- done
                        }
                    }
                }
            }

            // build queue from list
            pq.Clear();
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                if (pp.position > end)
                {
                    end = pp.position;
                }
                pq.Put(pp);                 // build pq from list
            }

            if (repeats != null)
            {
                tmpPos = new PhrasePositions[pq.Size()];
            }
            return(end);
        }
Exemplo n.º 7
0
        /// <summary> Init PhrasePositions in place.
        /// There is a one time initializatin for this scorer:
        /// <br>- Put in repeats[] each pp that has another pp with same position in the doc.
        /// <br>- Also mark each such pp by pp.repeats = true.
        /// <br>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
        /// In particular, this allows to score queries with no repetiotions with no overhead due to this computation.
        /// <br>- Example 1 - query with no repetitions: "ho my"~2
        /// <br>- Example 2 - query with repetitions: "ho my my"~2
        /// <br>- Example 3 - query with repetitions: "my ho my"~2
        /// <br>Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection.
        /// </summary>
        /// <returns> end (max position), or -1 if any term ran out (i.e. done)
        /// </returns>
        /// <throws>  IOException  </throws>
        private int InitPhrasePositions()
        {
            int end = 0;

            // no repeats at all (most common case is also the simplest one)
            if (checkedRepeats && repeats == null)
            {
                // build queue from list
                pq.Clear();
                for (PhrasePositions pp = first; pp != null; pp = pp.next)
                {
                    pp.FirstPosition();
                    if (pp.position > end)
                    {
                        end = pp.position;
                    }
                    pq.Put(pp);                     // build pq from list
                }
                return(end);
            }

            // position the pp's
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                pp.FirstPosition();
            }

            // one time initializatin for this scorer
            if (!checkedRepeats)
            {
                checkedRepeats = true;
                // check for repeats
                System.Collections.Hashtable m = null;
                for (PhrasePositions pp = first; pp != null; pp = pp.next)
                {
                    int tpPos = pp.position + pp.offset;
                    for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next)
                    {
                        int tpPos2 = pp2.position + pp2.offset;
                        if (tpPos2 == tpPos)
                        {
                            if (m == null)
                            {
                                m = new System.Collections.Hashtable();
                            }
                            pp.repeats  = true;
                            pp2.repeats = true;
                            m[pp]       = null;
                            m[pp2]      = null;
                        }
                    }
                }
                if (m != null)
                {
                    repeats = (PhrasePositions[])(new System.Collections.ArrayList(m.Keys).ToArray(typeof(PhrasePositions)));
                }
            }

            // with repeats must advance some repeating pp's so they all start with differing tp's
            if (repeats != null)
            {
                // must propagate higher offsets first (otherwise might miss matches).
                System.Array.Sort(repeats, new AnonymousClassComparator(this));
                // now advance them
                for (int i = 0; i < repeats.Length; i++)
                {
                    PhrasePositions pp = repeats[i];
                    while (!TermPositionsDiffer(pp))
                    {
                        if (!pp.NextPosition())
                        {
                            return(-1);                             // ran out of a term -- done
                        }
                    }
                }
            }

            // build queue from list
            pq.Clear();
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                if (pp.position > end)
                {
                    end = pp.position;
                }
                pq.Put(pp);                 // build pq from list
            }

            return(end);
        }