Ejemplo n.º 1
0
 private void PlaceFirstPositions()
 {
     for (PhrasePositions pp = min, prev = null; prev != max; pp = (prev = pp).next) // iterate cyclic list: done once handled max
     {
         pp.FirstPosition();
     }
 }
Ejemplo n.º 2
0
        private float freq; //prhase frequency in current doc as computed by phraseFreq().

        internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms) : base(similarity)
        {
            this.norms         = norms;
            this.weight        = weight;
            this.value_Renamed = weight.Value;

            // convert tps to a list of phrase positions.
            // note: phrase-position differs from term-position in that its position
            // reflects the phrase offset: pp.pos = tp.pos - offset.
            // this allows to easily identify a matching (exact) phrase
            // when all PhrasePositions have exactly the same position.
            for (int i = 0; i < tps.Length; i++)
            {
                PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
                if (last != null)
                {
                    // add next to end of list
                    last.next = pp;
                }
                else
                {
                    first = pp;
                }
                last = pp;
            }

            pq        = new PhraseQueue(tps.Length); // construct empty pq
            first.doc = -1;
        }
Ejemplo n.º 3
0
 internal SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, int slop, Similarity.SimScorer docScorer)
     : base(weight)
 {
     this.docScorer   = docScorer;
     this.slop        = slop;
     this.numPostings = postings == null ? 0 : postings.Length;
     pq = new PhraseQueue(postings.Length);
     // min(cost)
     cost = postings[0].postings.GetCost();
     // convert tps to a list of phrase positions.
     // note: phrase-position differs from term-position in that its position
     // reflects the phrase offset: pp.pos = tp.pos - offset.
     // this allows to easily identify a matching (exact) phrase
     // when all PhrasePositions have exactly the same position.
     if (postings.Length > 0)
     {
         min     = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
         max     = min;
         max.doc = -1;
         for (int i = 1; i < postings.Length; i++)
         {
             PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
             max.next = pp;
             max      = pp;
             max.doc  = -1;
         }
         max.next = min; // make it cyclic for easier manipulation
     }
 }
		private float freq; //prhase frequency in current doc as computed by phraseFreq().
		
		internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(similarity)
		{
			this.norms = norms;
			this.weight = weight;
			this.value_Renamed = weight.Value;
			
			// convert tps to a list of phrase positions.
			// note: phrase-position differs from term-position in that its position
			// reflects the phrase offset: pp.pos = tp.pos - offset.
			// this allows to easily identify a matching (exact) phrase 
			// when all PhrasePositions have exactly the same position.
			for (int i = 0; i < tps.Length; i++)
			{
				PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
				if (last != null)
				{
					// add next to end of list
					last.next = pp;
				}
				else
				{
					first = pp;
				}
				last = pp;
			}
			
			pq = new PhraseQueue(tps.Length); // construct empty pq
			first.doc = - 1;
		}
Ejemplo n.º 5
0
        protected internal override float PhraseFreq()
        {
            // sort list with pq
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                pp.FirstPosition();
                pq.Put(pp);         // build pq from list
            }
            PqToList();             // rebuild list from pq

            int freq = 0;

            do
            {
                // find position w/ all terms
                while (first.position < last.position)
                {
                    // scan forward in first
                    do
                    {
                        if (!first.NextPosition())
                        {
                            return((float)freq);
                        }
                    }while (first.position < last.position);
                    FirstToLast();
                }
                freq++;                 // all equal: a match
            }while (last.NextPosition());

            return((float)freq);
        }
Ejemplo n.º 6
0
 protected internal void  FirstToLast()
 {
     last.next = first; // move first to end of list
     last      = first;
     first     = first.next;
     last.next = null;
 }
Ejemplo n.º 7
0
        protected internal override float PhraseFreq()
        {
            // sort list with pq
            pq.Clear();
            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                pp.FirstPosition();
                pq.Put(pp);         // build pq from list
            }
            PqToList();             // rebuild list from pq

            // for counting how many times the exact phrase is found in current document,
            // just count how many times all PhrasePosition's have exactly the same position.
            int freq = 0;

            do
            {
                // find position w/ all terms
                while (first.position < last.position)
                {
                    // scan forward in first
                    do
                    {
                        if (!first.NextPosition())
                        {
                            return(freq);
                        }
                    }while (first.position < last.position);
                    FirstToLast();
                }
                freq++;                 // all equal: a match
            }while (last.NextPosition());

            return(freq);
        }
Ejemplo n.º 8
0
 private static PhrasePositions Lesser(PhrasePositions pp, PhrasePositions pp2) // LUCENENET: CA1822: Mark members as static
 {
     if (pp.position < pp2.position || (pp.position == pp2.position && pp.offset < pp2.offset))
     {
         return(pp);
     }
     return(pp2);
 }
Ejemplo n.º 9
0
 /// <summary>
 /// Compare two pps, but only by position and offset </summary>
 private PhrasePositions Lesser(PhrasePositions pp, PhrasePositions pp2)
 {
     if (pp.position < pp2.position || (pp.position == pp2.position && pp.offset < pp2.offset))
     {
         return(pp);
     }
     return(pp2);
 }
Ejemplo n.º 10
0
 private void  Sort()
 {
     pq.Clear();
     for (PhrasePositions pp = first; pp != null; pp = pp.next)
     {
         pq.Add(pp);
     }
     PqToList();
 }
Ejemplo n.º 11
0
 private void  Init()
 {
     for (PhrasePositions pp = first; more && pp != null; pp = pp.next)
     {
         more = pp.Next();
     }
     if (more)
     {
         Sort();
     }
 }
Ejemplo n.º 12
0
        //  private void printQueue(PrintStream ps, PhrasePositions ext, String title) {
        //    //if (min.doc != ?) return;
        //    ps.println();
        //    ps.println("---- "+title);
        //    ps.println("EXT: "+ext);
        //    PhrasePositions[] t = new PhrasePositions[pq.size()];
        //    if (pq.size()>0) {
        //      t[0] = pq.pop();
        //      ps.println("  " + 0 + "  " + t[0]);
        //      for (int i=1; i<t.length; i++) {
        //        t[i] = pq.pop();
        //        assert t[i-1].position <= t[i].position;
        //        ps.println("  " + i + "  " + t[i]);
        //      }
        //      // add them back
        //      for (int i=t.length-1; i>=0; i--) {
        //        pq.add(t[i]);
        //      }
        //    }
        //  }

        private bool AdvanceMin(int target)
        {
            if (!min.SkipTo(target))
            {
                max.doc = NO_MORE_DOCS; // for further calls to docID()
                return(false);
            }
            min = min.next; // cyclic
            max = max.next; // cyclic
            return(true);
        }
Ejemplo n.º 13
0
        //  private void printQueue(PrintStream ps, PhrasePositions ext, String title) {
        //    //if (min.doc != ?) return;
        //    ps.println();
        //    ps.println("---- "+title);
        //    ps.println("EXT: "+ext);
        //    PhrasePositions[] t = new PhrasePositions[pq.size()];
        //    if (pq.size()>0) {
        //      t[0] = pq.pop();
        //      ps.println("  " + 0 + "  " + t[0]);
        //      for (int i=1; i<t.length; i++) {
        //        t[i] = pq.pop();
        //        assert t[i-1].position <= t[i].position;
        //        ps.println("  " + i + "  " + t[i]);
        //      }
        //      // add them back
        //      for (int i=t.length-1; i>=0; i--) {
        //        pq.add(t[i]);
        //      }
        //    }
        //  }

        private bool AdvanceMin(int target)
        {
            if (!Min.SkipTo(target))
            {
                Max.Doc = NO_MORE_DOCS; // for further calls to docID()
                return(false);
            }
            Min = Min.next; // cyclic
            Max = Max.next; // cyclic
            return(true);
        }
Ejemplo n.º 14
0
 /// <summary>
 /// Fill the queue (all pps are already placed </summary>
 private void FillQueue()
 {
     Pq.Clear();
     for (PhrasePositions pp = Min, prev = null; prev != Max; pp = (prev = pp).next) // iterate cyclic list: done once handled max
     {
         if (pp.Position > End)
         {
             End = pp.Position;
         }
         Pq.Add(pp);
     }
 }
Ejemplo n.º 15
0
 /// <summary>
 /// Advance a PhrasePosition and update 'end', return false if exhausted </summary>
 private bool AdvancePP(PhrasePositions pp)
 {
     if (!pp.NextPosition())
     {
         return(false);
     }
     if (pp.position > end)
     {
         end = pp.position;
     }
     return(true);
 }
Ejemplo n.º 16
0
 /// <summary>
 /// Fill the queue (all pps are already placed) </summary>
 private void FillQueue()
 {
     pq.Clear();
     for (PhrasePositions pp = min, prev = null; prev != max; pp = (prev = pp).next) // iterate cyclic list: done once handled max
     {
         if (pp.position > end)
         {
             end = pp.position;
         }
         pq.Add(pp);
     }
 }
Ejemplo n.º 17
0
 public override bool SkipTo(int target)
 {
     for (PhrasePositions pp = first; more && pp != null; pp = pp.next)
     {
         more = pp.SkipTo(target);
     }
     if (more)
     {
         Sort();                 // re-sort
     }
     return(DoNext());
 }
Ejemplo n.º 18
0
        public override bool LessThan(System.Object o1, System.Object o2)
        {
            PhrasePositions pp1 = (PhrasePositions)o1;
            PhrasePositions pp2 = (PhrasePositions)o2;

            if (pp1.doc == pp2.doc)
            {
                return(pp1.position < pp2.position);
            }
            else
            {
                return(pp1.doc < pp2.doc);
            }
        }
Ejemplo n.º 19
0
 /// <summary>
 /// No repeats: simplest case, and most common. It is important to keep this piece of the code simple and efficient </summary>
 private void InitSimple()
 {
     //System.err.println("initSimple: doc: "+min.doc);
     pq.Clear();
     // position pps and build queue from list
     for (PhrasePositions pp = min, prev = null; prev != max; pp = (prev = pp).next) // iterate cyclic list: done once handled max
     {
         pp.FirstPosition();
         if (pp.position > end)
         {
             end = pp.position;
         }
         pq.Add(pp);
     }
 }
Ejemplo n.º 20
0
        /// <summary>
        /// Index of a pp2 colliding with pp, or -1 if none </summary>
        private int Collide(PhrasePositions pp)
        {
            int tpPos = TpPos(pp);

            PhrasePositions[] rg = rptGroups[pp.rptGroup];
            for (int i = 0; i < rg.Length; i++)
            {
                PhrasePositions pp2 = rg[i];
                if (pp2 != pp && TpPos(pp2) == tpPos)
                {
                    return(pp2.rptInd);
                }
            }
            return(-1);
        }
Ejemplo n.º 21
0
        protected internal override float PhraseFreq()
        {
            pq.Clear();
            int end = 0;

            for (PhrasePositions pp = first; pp != null; pp = pp.next)
            {
                pp.FirstPosition();
                if (pp.position > end)
                {
                    end = pp.position;
                }
                pq.Put(pp);                 // build pq from list
            }

            float freq = 0.0f;
            bool  done = false;

            do
            {
                PhrasePositions pp    = (PhrasePositions)pq.Pop();
                int             start = pp.position;
                int             next  = ((PhrasePositions)pq.Top()).position;
                for (int pos = start; pos <= next; pos = pp.position)
                {
                    start = pos;                     // advance pp to min window
                    if (!pp.NextPosition())
                    {
                        done = true;                         // ran out of a term -- done
                        break;
                    }
                }

                int matchLength = end - start;
                if (matchLength <= slop)
                {
                    freq += GetSimilarity().SloppyFreq(matchLength);                     // score match
                }
                if (pp.position > end)
                {
                    end = pp.position;
                }
                pq.Put(pp);                 // restore pq
            }while (!done);

            return(freq);
        }
Ejemplo n.º 22
0
        /// <summary> Score a candidate doc for all slop-valid position-combinations (matches)
        /// encountered while traversing/hopping the PhrasePositions.
        /// <br/> The score contribution of a match depends on the distance:
        /// <br/> - highest score for distance=0 (exact match).
        /// <br/> - score gets lower as distance gets higher.
        /// <br/>Example: for query "a b"~2, a document "x a b a y" can be scored twice:
        /// once for "a b" (distance=0), and once for "b a" (distance=2).
        /// <br/>Possibly not all valid combinations are encountered, because for efficiency
        /// we always propagate the least PhrasePosition. This allows to base on
        /// PriorityQueue and move forward faster.
        /// As result, for example, document "a b c b a"
        /// would score differently for queries "a b c"~4 and "c b a"~4, although
        /// they really are equivalent.
        /// Similarly, for doc "a b c b a f g", query "c b"~2
        /// would get same score as "g f"~2, although "c b"~2 could be matched twice.
        /// We may want to fix this in the future (currently not, for performance reasons).
        /// </summary>
        protected internal override float PhraseFreq()
        {
            int end = InitPhrasePositions();

            float freq = 0.0f;
            bool  done = (end < 0);

            while (!done)
            {
                PhrasePositions pp    = (PhrasePositions)pq.Pop();
                int             start = pp.position;
                int             next  = ((PhrasePositions)pq.Top()).position;

                bool tpsDiffer = true;
                for (int pos = start; pos <= next || !tpsDiffer; pos = pp.position)
                {
                    if (pos <= next && tpsDiffer)
                    {
                        start = pos;                         // advance pp to min window
                    }
                    if (!pp.NextPosition())
                    {
                        done = true;                         // ran out of a term -- done
                        break;
                    }
                    PhrasePositions pp2 = null;
                    tpsDiffer = !pp.repeats || (pp2 = TermPositionsDiffer(pp)) == null;
                    if (pp2 != null && pp2 != pp)
                    {
                        pp = Flip(pp, pp2);                         // flip pp to pp2
                    }
                }

                int matchLength = end - start;
                if (matchLength <= slop)
                {
                    freq += GetSimilarity().SloppyFreq(matchLength);                     // score match
                }
                if (pp.position > end)
                {
                    end = pp.position;
                }
                pq.Put(pp);                 // restore pq
            }

            return(freq);
        }
Ejemplo n.º 23
0
        /// <summary>
        /// Score a candidate doc for all slop-valid position-combinations (matches)
        /// encountered while traversing/hopping the PhrasePositions.
        /// <para/> The score contribution of a match depends on the distance:
        /// <para/> - highest score for distance=0 (exact match).
        /// <para/> - score gets lower as distance gets higher.
        /// <para/>Example: for query "a b"~2, a document "x a b a y" can be scored twice:
        /// once for "a b" (distance=0), and once for "b a" (distance=2).
        /// <para/>Possibly not all valid combinations are encountered, because for efficiency
        /// we always propagate the least PhrasePosition. This allows to base on
        /// <see cref="Util.PriorityQueue{T}"/> and move forward faster.
        /// As result, for example, document "a b c b a"
        /// would score differently for queries "a b c"~4 and "c b a"~4, although
        /// they really are equivalent.
        /// Similarly, for doc "a b c b a f g", query "c b"~2
        /// would get same score as "g f"~2, although "c b"~2 could be matched twice.
        /// We may want to fix this in the future (currently not, for performance reasons).
        /// </summary>
        private float PhraseFreq()
        {
            if (!InitPhrasePositions())
            {
                return(0.0f);
            }
            float freq = 0.0f;

            numMatches = 0;
            PhrasePositions pp          = pq.Pop();
            int             matchLength = end - pp.position;
            int             next        = pq.Top.position;

            while (AdvancePP(pp))
            {
                if (hasRpts && !AdvanceRpts(pp))
                {
                    break;              // pps exhausted
                }
                if (pp.position > next) // done minimizing current match-length
                {
                    if (matchLength <= slop)
                    {
                        freq += docScorer.ComputeSlopFactor(matchLength); // score match
                        numMatches++;
                    }
                    pq.Add(pp);
                    pp          = pq.Pop();
                    next        = pq.Top.position;
                    matchLength = end - pp.position;
                }
                else
                {
                    int matchLength2 = end - pp.position;
                    if (matchLength2 < matchLength)
                    {
                        matchLength = matchLength2;
                    }
                }
            }
            if (matchLength <= slop)
            {
                freq += docScorer.ComputeSlopFactor(matchLength); // score match
                numMatches++;
            }
            return(freq);
        }
Ejemplo n.º 24
0
 public override int Advance(int target)
 {
     firstTime = false;
     for (PhrasePositions pp = first; more && pp != null; pp = pp.next)
     {
         more = pp.SkipTo(target);
     }
     if (more)
     {
         Sort(); // re-sort
     }
     if (!DoNext())
     {
         first.doc = NO_MORE_DOCS;
     }
     return(first.doc);
 }
Ejemplo n.º 25
0
        /// <summary>
        /// pp was just advanced. If that caused a repeater collision, resolve by advancing the lesser
        /// of the two colliding pps. Note that there can only be one collision, as by the initialization
        /// there were no collisions before pp was advanced.
        /// </summary>
        private bool AdvanceRpts(PhrasePositions pp)
        {
            if (pp.rptGroup < 0)
            {
                return(true); // not a repeater
            }
            PhrasePositions[] rg   = rptGroups[pp.rptGroup];
            FixedBitSet       bits = new FixedBitSet(rg.Length); // for re-queuing after collisions are resolved
            int k0 = pp.rptInd;
            int k;

            while ((k = Collide(pp)) >= 0)
            {
                pp = Lesser(pp, rg[k]); // always advance the lesser of the (only) two colliding pps
                if (!AdvancePP(pp))
                {
                    return(false); // exhausted
                }
                if (k != k0)       // careful: mark only those currently in the queue
                {
                    bits = FixedBitSet.EnsureCapacity(bits, k);
                    bits.Set(k); // mark that pp2 need to be re-queued
                }
            }
            // collisions resolved, now re-queue
            // empty (partially) the queue until seeing all pps advanced for resolving collisions
            int n = 0;
            // TODO would be good if we can avoid calling cardinality() in each iteration!
            int numBits = bits.Length; // larges bit we set

            while (bits.Cardinality > 0)
            {
                PhrasePositions pp2 = pq.Pop();
                rptStack[n++] = pp2;
                if (pp2.rptGroup >= 0 && pp2.rptInd < numBits && bits.Get(pp2.rptInd)) // this bit may not have been set
                {
                    bits.Clear(pp2.rptInd);
                }
            }
            // add back to queue
            for (int i = n - 1; i >= 0; i--)
            {
                pq.Add(rptStack[i]);
            }
            return(true);
        }
Ejemplo n.º 26
0
 /// <summary>
 /// At initialization (each doc), each repetition group is sorted by (query) offset.
 /// this provides the start condition: no collisions.
 /// <para/>Case 1: no multi-term repeats
 /// <para/>
 /// It is sufficient to advance each pp in the group by one less than its group index.
 /// So lesser pp is not advanced, 2nd one advance once, 3rd one advanced twice, etc.
 /// <para/>Case 2: multi-term repeats
 /// </summary>
 /// <returns> <c>false</c> if PPs are exhausted.  </returns>
 private bool AdvanceRepeatGroups()
 {
     foreach (PhrasePositions[] rg in rptGroups)
     {
         if (hasMultiTermRpts)
         {
             // more involved, some may not collide
             int incr;
             for (int i = 0; i < rg.Length; i += incr)
             {
                 incr = 1;
                 PhrasePositions pp = rg[i];
                 int             k;
                 while ((k = Collide(pp)) >= 0)
                 {
                     PhrasePositions pp2 = Lesser(pp, rg[k]);
                     if (!AdvancePP(pp2)) // at initialization always advance pp with higher offset
                     {
                         return(false);   // exhausted
                     }
                     if (pp2.rptInd < i)  // should not happen?
                     {
                         incr = 0;
                         break;
                     }
                 }
             }
         }
         else
         {
             // simpler, we know exactly how much to advance
             for (int j = 1; j < rg.Length; j++)
             {
                 for (int k = 0; k < j; k++)
                 {
                     if (!rg[j].NextPosition())
                     {
                         return(false); // PPs exhausted
                     }
                 }
             }
         }
     }
     return(true); // PPs available
 }
Ejemplo n.º 27
0
        /// <summary>
        /// Find repeating pps, and for each, if has multi-terms, update this.hasMultiTermRpts </summary>
        private PhrasePositions[] RepeatingPPs(IDictionary <Term, int?> rptTerms)
        {
            IList <PhrasePositions> rp = new JCG.List <PhrasePositions>();

            for (PhrasePositions pp = min, prev = null; prev != max; pp = (prev = pp).next) // iterate cyclic list: done once handled max
            {
                foreach (Term t in pp.terms)
                {
                    if (rptTerms.ContainsKey(t))
                    {
                        rp.Add(pp);
                        hasMultiTermRpts |= (pp.terms.Length > 1);
                        break;
                    }
                }
            }
            return(rp.ToArray());
        }
Ejemplo n.º 28
0
        // flip pp2 and pp in the queue: pop until finding pp2, insert back all but pp2, insert pp back.
        // assumes: pp!=pp2, pp2 in pq, pp not in pq.
        // called only when there are repeating pps.
        private PhrasePositions Flip(PhrasePositions pp, PhrasePositions pp2)
        {
            int             n = 0;
            PhrasePositions pp3;

            //pop until finding pp2
            while ((pp3 = (PhrasePositions)pq.Pop()) != pp2)
            {
                tmpPos[n++] = pp3;
            }
            //insert back all but pp2
            for (n--; n >= 0; n--)
            {
                pq.Insert(tmpPos[n]);
            }
            //insert pp back
            pq.Put(pp);
            return(pp2);
        }
Ejemplo n.º 29
0
 protected internal void  PqToList()
 {
     last = first = null;
     while (pq.Top() != null)
     {
         PhrasePositions pp = pq.Pop();
         if (last != null)
         {
             // add next to end of list
             last.next = pp;
         }
         else
         {
             first = pp;
         }
         last    = pp;
         pp.next = null;
     }
 }
Ejemplo n.º 30
0
        /// <summary>
        /// Find repeating terms and assign them ordinal values </summary>
        private JCG.LinkedDictionary <Term, int?> RepeatingTerms()
        {
            JCG.LinkedDictionary <Term, int?> tord = new JCG.LinkedDictionary <Term, int?>();
            Dictionary <Term, int?>           tcnt = new Dictionary <Term, int?>();

            for (PhrasePositions pp = min, prev = null; prev != max; pp = (prev = pp).next) // iterate cyclic list: done once handled max
            {
                foreach (Term t in pp.terms)
                {
                    tcnt.TryGetValue(t, out int?cnt0);
                    int?cnt = cnt0 == null ? new int?(1) : new int?(1 + (int)cnt0);
                    tcnt[t] = cnt;
                    if (cnt == 2)
                    {
                        tord[t] = tord.Count;
                    }
                }
            }
            return(tord);
        }
Ejemplo n.º 31
0
		internal PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, byte[] norms) : base(similarity)
		{
			this.norms = norms;
			this.weight = weight;
			this.value_Renamed = weight.GetValue();
			
			// convert tps to a list
			for (int i = 0; i < tps.Length; i++)
			{
				PhrasePositions pp = new PhrasePositions(tps[i], positions[i]);
				if (last != null)
				{
					// add next to end of list
					last.next = pp;
				}
				else
					first = pp;
				last = pp;
			}
			
			pq = new PhraseQueue(tps.Length); // construct empty pq
		}
Ejemplo n.º 32
0
        /// <summary> We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences
        /// in the query of the same word would go elsewhere in the matched doc.
        /// </summary>
        /// <returns> null if differ (i.e. valid) otherwise return the higher offset PhrasePositions
        /// out of the first two PPs found to not differ.
        /// </returns>
        private PhrasePositions TermPositionsDiffer(PhrasePositions pp)
        {
            // efficiency note: a more efficient implementation could keep a map between repeating
            // pp's, so that if pp1a, pp1b, pp1c are repeats term1, and pp2a, pp2b are repeats
            // of term2, pp2a would only be checked against pp2b but not against pp1a, pp1b, pp1c.
            // However this would complicate code, for a rather rare case, so choice is to compromise here.
            int tpPos = pp.position + pp.offset;

            for (int i = 0; i < repeats.Length; i++)
            {
                PhrasePositions pp2 = repeats[i];
                if (pp2 == pp)
                {
                    continue;
                }
                int tpPos2 = pp2.position + pp2.offset;
                if (tpPos2 == tpPos)
                {
                    return(pp.offset > pp2.offset?pp:pp2);                    // do not differ: return the one with higher offset.
                }
            }
            return(null);
        }
 /// <summary> We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences 
 /// in the query of the same word would go elsewhere in the matched doc.
 /// </summary>
 /// <returns> null if differ (i.e. valid) otherwise return the higher offset PhrasePositions
 /// out of the first two PPs found to not differ.
 /// </returns>
 private PhrasePositions TermPositionsDiffer(PhrasePositions pp)
 {
     // efficiency note: a more efficient implementation could keep a map between repeating
     // pp's, so that if pp1a, pp1b, pp1c are repeats term1, and pp2a, pp2b are repeats
     // of term2, pp2a would only be checked against pp2b but not against pp1a, pp1b, pp1c.
     // However this would complicate code, for a rather rare case, so choice is to compromise here.
     int tpPos = pp.position + pp.offset;
     for (int i = 0; i < repeats.Length; i++)
     {
         PhrasePositions pp2 = repeats[i];
         if (pp2 == pp)
             continue;
         int tpPos2 = pp2.position + pp2.offset;
         if (tpPos2 == tpPos)
             return pp.offset > pp2.offset?pp:pp2; // do not differ: return the one with higher offset.
     }
     return null;
 }
Ejemplo n.º 34
0
		protected internal void  FirstToLast()
		{
			last.next = first; // move first to end of list
			last = first;
			first = first.next;
			last.next = null;
		}
Ejemplo n.º 35
0
		protected internal void  PqToList()
		{
			last = first = null;
			while (pq.Top() != null)
			{
				PhrasePositions pp = (PhrasePositions) pq.Pop();
				if (last != null)
				{
					// add next to end of list
					last.next = pp;
				}
				else
					first = pp;
				last = pp;
				pp.next = null;
			}
		}
 // flip pp2 and pp in the queue: pop until finding pp2, insert back all but pp2, insert pp back.
 // assumes: pp!=pp2, pp2 in pq, pp not in pq.
 // called only when there are repeating pps.
 private PhrasePositions Flip(PhrasePositions pp, PhrasePositions pp2)
 {
     int n = 0;
     PhrasePositions pp3;
     //pop until finding pp2
     while ((pp3 = (PhrasePositions) pq.Pop()) != pp2)
     {
         tmpPos[n++] = pp3;
     }
     //insert back all but pp2
     for (n--; n >= 0; n--)
     {
         pq.Insert(tmpPos[n]);
     }
     //insert pp back
     pq.Put(pp);
     return pp2;
 }