예제 #1
0
 protected internal override AcceptStatus AcceptPosition(Spans spans)
 {
     Debug.Assert(spans.Start() != spans.End(), "start equals end: " + spans.Start());
     if (spans.Start() >= end)
     {
         return AcceptStatus.NO_AND_ADVANCE;
     }
     else if (spans.End() <= end)
     {
         return AcceptStatus.YES;
     }
     else
     {
         return AcceptStatus.NO;
     }
 }
예제 #2
0
 public /*protected internal*/ virtual bool SetFreqCurrentDoc()
 {
     if (!more)
     {
         return(false);
     }
     doc  = spans.Doc();
     freq = 0.0f;
     do
     {
         int matchLength = spans.End() - spans.Start();
         freq += GetSimilarity().SloppyFreq(matchLength);
         more  = spans.Next();
     }while (more && (doc == spans.Doc()));
     return(true);
 }
예제 #3
0
 protected internal virtual bool SetFreqCurrentDoc()
 {
     if (!more)
     {
         return(false);
     }
     doc  = spans.Doc();
     freq = 0.0f;
     while (more && doc == spans.Doc())
     {
         int matchLength = spans.End() - spans.Start();
         freq += GetSimilarity().SloppyFreq(matchLength);
         more  = spans.Next();
     }
     return(more || (freq != 0));
 }
예제 #4
0
 protected internal virtual bool SetFreqCurrentDoc()
 {
     if (!More)
     {
         return(false);
     }
     Doc          = Spans.Doc();
     Freq_Renamed = 0.0f;
     NumMatches   = 0;
     do
     {
         int matchLength = Spans.End() - Spans.Start();
         Freq_Renamed += DocScorer.ComputeSlopFactor(matchLength);
         NumMatches++;
         More = Spans.Next();
     } while (More && (Doc == Spans.Doc()));
     return(true);
 }
예제 #5
0
            public override bool LessThan(object o1, object o2)
            {
                Spans spans1 = (Spans)o1;
                Spans spans2 = (Spans)o2;

                if (spans1.Doc() == spans2.Doc())
                {
                    if (spans1.Start() == spans2.Start())
                    {
                        return(spans1.End() < spans2.End());
                    }
                    else
                    {
                        return(spans1.Start() < spans2.Start());
                    }
                }
                else
                {
                    return(spans1.Doc() < spans2.Doc());
                }
            }
예제 #6
0
        private void CheckSpans(Spans spans, int numSpans, int[] numPayloads)
        {
            int cnt = 0;

            while (spans.Next() == true)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nSpans Dump --");
                }
                if (spans.PayloadAvailable)
                {
                    var payload = spans.Payload;
                    if (VERBOSE)
                    {
                        Console.WriteLine("payloads for span:" + payload.Count);
                        foreach (var bytes in payload)
                        {
                            Console.WriteLine("doc:" + spans.Doc() + " s:" + spans.Start() + " e:" + spans.End() + " " + Encoding.UTF8.GetString((byte[])(Array)bytes));
                        }
                    }

                    Assert.AreEqual(numPayloads[cnt], payload.Count);
                }
                else
                {
                    Assert.IsFalse(numPayloads.Length > 0 && numPayloads[cnt] > 0, "Expected spans:" + numPayloads[cnt] + " found: 0");
                }
                cnt++;
            }

            Assert.AreEqual(numSpans, cnt);
        }
예제 #7
0
		public virtual System.String S(Spans span)
		{
			return S(span.Doc(), span.Start(), span.End());
		}
예제 #8
0
 public virtual int End()
 {
     return(spans.End());
 }
예제 #9
0
 public virtual System.String S(Spans span)
 {
     return(S(span.Doc(), span.Start(), span.End()));
 }
예제 #10
0
 private void TstNextSpans(Spans spans, int doc, int start, int end)
 {
     Assert.IsTrue(spans.Next(), "next");
     Assert.AreEqual(doc, spans.Doc(), "doc");
     Assert.AreEqual(start, spans.Start(), "start");
     Assert.AreEqual(end, spans.End(), "end");
 }
예제 #11
0
 /// <summary>
 /// Check whether two Spans in the same document are ordered. </summary>
 /// <returns> true iff spans1 starts before spans2
 ///              or the spans start at the same position,
 ///              and spans1 ends before spans2. </returns>
 internal static bool DocSpansOrdered(Spans spans1, Spans spans2)
 {
     Debug.Assert(spans1.Doc() == spans2.Doc(), "doc1 " + spans1.Doc() + " != doc2 " + spans2.Doc());
     int start1 = spans1.Start();
     int start2 = spans2.Start();
     /* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */
     return (start1 == start2) ? (spans1.End() < spans2.End()) : (start1 < start2);
 }
예제 #12
0
        private void CheckSpans(Spans spans, int numSpans, int[] numPayloads)
        {
            int cnt = 0;

            while (spans.Next() == true)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nSpans Dump --");
                }
                if (spans.PayloadAvailable)
                {
                    var payload = spans.Payload;
                    if (VERBOSE)
                    {
                        Console.WriteLine("payloads for span:" + payload.Count);
                        foreach (var bytes in payload)
                        {
                            Console.WriteLine("doc:" + spans.Doc() + " s:" + spans.Start() + " e:" + spans.End() + " " + Encoding.UTF8.GetString((byte[])(Array)bytes));
                        }
                    }

                    Assert.AreEqual(numPayloads[cnt], payload.Count);
                }
                else
                {
                    Assert.IsFalse(numPayloads.Length > 0 && numPayloads[cnt] > 0, "Expected spans:" + numPayloads[cnt] + " found: 0");
                }
                cnt++;
            }

            Assert.AreEqual(numSpans, cnt);
        }
예제 #13
0
		private void  CheckSpans(Spans spans, int numSpans, int[] numPayloads)
		{
			int cnt = 0;
			
			while (spans.Next() == true)
			{
				if (DEBUG)
					System.Console.Out.WriteLine("\nSpans Dump --");
				if (spans.IsPayloadAvailable())
				{
					System.Collections.Generic.ICollection<byte[]> payload = spans.GetPayload();
					if (DEBUG)
						System.Console.Out.WriteLine("payloads for span:" + payload.Count);
					System.Collections.IEnumerator it = payload.GetEnumerator();
					while (it.MoveNext())
					{
						byte[] bytes = (byte[]) it.Current;
						if (DEBUG)
							System.Console.Out.WriteLine("doc:" + spans.Doc() + " s:" + spans.Start() + " e:" + spans.End() + " " + new System.String(System.Text.UTF8Encoding.UTF8.GetChars(bytes)));
					}
					
					Assert.AreEqual(numPayloads[cnt], payload.Count);
				}
				else
				{
					Assert.IsFalse(numPayloads.Length > 0 && numPayloads[cnt] > 0, "Expected spans:" + numPayloads[cnt] + " found: 0");
				}
				cnt++;
			}
			
			Assert.AreEqual(numSpans, cnt);
		}
예제 #14
0
        /// <summary>
        /// The subSpans are ordered in the same doc, so there is a possible match.
        /// Compute the slop while making the match as short as possible by advancing
        /// all subSpans except the last one in reverse order.
        /// </summary>
        private bool ShrinkToAfterShortestMatch()
        {
            MatchStart = subSpans[subSpans.Length - 1].Start();
            MatchEnd   = subSpans[subSpans.Length - 1].End();
            var possibleMatchPayloads = new HashSet <byte[]>();

            if (subSpans[subSpans.Length - 1].PayloadAvailable)
            {
                //LUCENE TO-DO UnionWith or AddAll(Set<>, IEnumerable<>)
                possibleMatchPayloads.UnionWith(subSpans[subSpans.Length - 1].Payload);
            }

            IList <byte[]> possiblePayload = null;

            int matchSlop = 0;
            int lastStart = MatchStart;
            int lastEnd   = MatchEnd;

            for (int i = subSpans.Length - 2; i >= 0; i--)
            {
                Spans prevSpans = subSpans[i];
                if (CollectPayloads && prevSpans.PayloadAvailable)
                {
                    var payload = prevSpans.Payload;
                    possiblePayload = new List <byte[]>(payload.Count);
                    possiblePayload.AddRange(payload);
                }

                int prevStart = prevSpans.Start();
                int prevEnd   = prevSpans.End();
                while (true) // Advance prevSpans until after (lastStart, lastEnd)
                {
                    if (!prevSpans.Next())
                    {
                        InSameDoc = false;
                        More      = false;
                        break; // Check remaining subSpans for final match.
                    }
                    else if (MatchDoc != prevSpans.Doc())
                    {
                        InSameDoc = false; // The last subSpans is not advanced here.
                        break;             // Check remaining subSpans for last match in this document.
                    }
                    else
                    {
                        int ppStart = prevSpans.Start();
                        int ppEnd   = prevSpans.End(); // Cannot avoid invoking .end()
                        if (!DocSpansOrdered(ppStart, ppEnd, lastStart, lastEnd))
                        {
                            break; // Check remaining subSpans.
                        } // prevSpans still before (lastStart, lastEnd)
                        else
                        {
                            prevStart = ppStart;
                            prevEnd   = ppEnd;
                            if (CollectPayloads && prevSpans.PayloadAvailable)
                            {
                                var payload = prevSpans.Payload;
                                possiblePayload = new List <byte[]>(payload.Count);
                                possiblePayload.AddRange(payload);
                            }
                        }
                    }
                }

                if (CollectPayloads && possiblePayload != null)
                {
                    possibleMatchPayloads.UnionWith(possiblePayload);
                }

                Debug.Assert(prevStart <= MatchStart);
                if (MatchStart > prevEnd) // Only non overlapping spans add to slop.
                {
                    matchSlop += (MatchStart - prevEnd);
                }

                /* Do not break on (matchSlop > allowedSlop) here to make sure
                 * that subSpans[0] is advanced after the match, if any.
                 */
                MatchStart = prevStart;
                lastStart  = prevStart;
                lastEnd    = prevEnd;
            }

            bool match = matchSlop <= AllowedSlop;

            if (CollectPayloads && match && possibleMatchPayloads.Count > 0)
            {
                MatchPayload.AddRange(possibleMatchPayloads);
            }

            return(match); // ordered and allowed slop
        }
예제 #15
0
 public virtual string s(Spans span)
 {
     return s(span.Doc(), span.Start(), span.End());
 }
예제 #16
0
        public virtual void  TestSpanNearUnOrdered()
        {
            //See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test
            SpanNearQuery snq;

            snq = new SpanNearQuery(new SpanQuery[] { MakeSpanTermQuery("u1"), MakeSpanTermQuery("u2") }, 0, false);
            Spans spans = snq.GetSpans(searcher.GetIndexReader());

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(4, spans.Doc(), "doc");
            Assert.AreEqual(1, spans.Start(), "start");
            Assert.AreEqual(3, spans.End(), "end");

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(5, spans.Doc(), "doc");
            Assert.AreEqual(2, spans.Start(), "start");
            Assert.AreEqual(4, spans.End(), "end");

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(8, spans.Doc(), "doc");
            Assert.AreEqual(2, spans.Start(), "start");
            Assert.AreEqual(4, spans.End(), "end");

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(9, spans.Doc(), "doc");
            Assert.AreEqual(0, spans.Start(), "start");
            Assert.AreEqual(2, spans.End(), "end");

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(10, spans.Doc(), "doc");
            Assert.AreEqual(0, spans.Start(), "start");
            Assert.AreEqual(2, spans.End(), "end");
            Assert.IsTrue(spans.Next() == false, "Has next and it shouldn't: " + spans.Doc());

            SpanNearQuery u1u2 = new SpanNearQuery(new SpanQuery[] { MakeSpanTermQuery("u1"), MakeSpanTermQuery("u2") }, 0, false);

            snq   = new SpanNearQuery(new SpanQuery[] { u1u2, MakeSpanTermQuery("u2") }, 1, false);
            spans = snq.GetSpans(searcher.GetIndexReader());
            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(4, spans.Doc(), "doc");
            Assert.AreEqual(0, spans.Start(), "start");
            Assert.AreEqual(3, spans.End(), "end");

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            //unordered spans can be subsets
            Assert.AreEqual(4, spans.Doc(), "doc");
            Assert.AreEqual(1, spans.Start(), "start");
            Assert.AreEqual(3, spans.End(), "end");

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(5, spans.Doc(), "doc");
            Assert.AreEqual(0, spans.Start(), "start");
            Assert.AreEqual(4, spans.End(), "end");

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(5, spans.Doc(), "doc");
            Assert.AreEqual(2, spans.Start(), "start");
            Assert.AreEqual(4, spans.End(), "end");

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(8, spans.Doc(), "doc");
            Assert.AreEqual(0, spans.Start(), "start");
            Assert.AreEqual(4, spans.End(), "end");


            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(8, spans.Doc(), "doc");
            Assert.AreEqual(2, spans.Start(), "start");
            Assert.AreEqual(4, spans.End(), "end");

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(9, spans.Doc(), "doc");
            Assert.AreEqual(0, spans.Start(), "start");
            Assert.AreEqual(2, spans.End(), "end");

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(9, spans.Doc(), "doc");
            Assert.AreEqual(0, spans.Start(), "start");
            Assert.AreEqual(4, spans.End(), "end");

            Assert.IsTrue(spans.Next(), "Does not have next and it should");
            Assert.AreEqual(10, spans.Doc(), "doc");
            Assert.AreEqual(0, spans.Start(), "start");
            Assert.AreEqual(2, spans.End(), "end");

            Assert.IsTrue(spans.Next() == false, "Has next and it shouldn't");
        }
예제 #17
0
        /// <summary>The subSpans are ordered in the same doc, so there is a possible match.
        /// Compute the slop while making the match as short as possible by advancing
        /// all subSpans except the last one in reverse order.
        /// </summary>
        private bool ShrinkToAfterShortestMatch()
        {
            matchStart = subSpans[subSpans.Length - 1].Start();
            matchEnd   = subSpans[subSpans.Length - 1].End();
            System.Collections.Generic.Dictionary <byte[], byte[]> possibleMatchPayloads = new System.Collections.Generic.Dictionary <byte[], byte[]>();
            if (subSpans[subSpans.Length - 1].IsPayloadAvailable())
            {
                System.Collections.Generic.ICollection <byte[]> payload = subSpans[subSpans.Length - 1].GetPayload();
                foreach (byte[] pl in payload)
                {
                    if (!possibleMatchPayloads.ContainsKey(pl))
                    {
                        possibleMatchPayloads.Add(pl, pl);
                    }
                }
            }

            System.Collections.Generic.List <byte[]> possiblePayload = null;

            int matchSlop = 0;
            int lastStart = matchStart;
            int lastEnd   = matchEnd;

            for (int i = subSpans.Length - 2; i >= 0; i--)
            {
                Spans prevSpans = subSpans[i];
                if (collectPayloads && prevSpans.IsPayloadAvailable())
                {
                    System.Collections.Generic.ICollection <byte[]> payload = prevSpans.GetPayload();
                    possiblePayload = new System.Collections.Generic.List <byte[]>(payload.Count);
                    possiblePayload.AddRange(payload);
                }

                int prevStart = prevSpans.Start();
                int prevEnd   = prevSpans.End();
                while (true)
                {
                    // Advance prevSpans until after (lastStart, lastEnd)
                    if (!prevSpans.Next())
                    {
                        inSameDoc = false;
                        more      = false;
                        break; // Check remaining subSpans for final match.
                    }
                    else if (matchDoc != prevSpans.Doc())
                    {
                        inSameDoc = false; // The last subSpans is not advanced here.
                        break;             // Check remaining subSpans for last match in this document.
                    }
                    else
                    {
                        int ppStart = prevSpans.Start();
                        int ppEnd   = prevSpans.End(); // Cannot avoid invoking .end()
                        if (!DocSpansOrdered(ppStart, ppEnd, lastStart, lastEnd))
                        {
                            break; // Check remaining subSpans.
                        }
                        else
                        {
                            // prevSpans still before (lastStart, lastEnd)
                            prevStart = ppStart;
                            prevEnd   = ppEnd;
                            if (collectPayloads && prevSpans.IsPayloadAvailable())
                            {
                                System.Collections.Generic.ICollection <byte[]> payload = prevSpans.GetPayload();
                                possiblePayload = new System.Collections.Generic.List <byte[]>(payload.Count);
                                possiblePayload.AddRange(payload);
                            }
                        }
                    }
                }

                if (collectPayloads && possiblePayload != null)
                {
                    foreach (byte[] pl in possiblePayload)
                    {
                        if (!possibleMatchPayloads.ContainsKey(pl))
                        {
                            possibleMatchPayloads.Add(pl, pl);
                        }
                    }
                }

                System.Diagnostics.Debug.Assert(prevStart <= matchStart);
                if (matchStart > prevEnd)
                {
                    // Only non overlapping spans add to slop.
                    matchSlop += (matchStart - prevEnd);
                }

                /* Do not break on (matchSlop > allowedSlop) here to make sure
                 * that subSpans[0] is advanced after the match, if any.
                 */
                matchStart = prevStart;
                lastStart  = prevStart;
                lastEnd    = prevEnd;
            }

            bool match = matchSlop <= allowedSlop;

            if (collectPayloads && match && possibleMatchPayloads.Count > 0)
            {
                matchPayload.AddRange(possibleMatchPayloads.Keys);
            }

            return(match); // ordered and allowed slop
        }
예제 #18
0
 public override int End()
 // TODO: Remove warning after API has been finalized
 {
     return(Spans.End());
 }
예제 #19
0
 public override int End()
 {
     return(spans.End());
 }
예제 #20
0
        private void  CheckSpans(Spans spans, int numSpans, int[] numPayloads)
        {
            int cnt = 0;

            while (spans.Next() == true)
            {
                if (DEBUG)
                {
                    System.Console.Out.WriteLine("\nSpans Dump --");
                }
                if (spans.IsPayloadAvailable())
                {
                    System.Collections.Generic.ICollection <byte[]> payload = spans.GetPayload();
                    if (DEBUG)
                    {
                        System.Console.Out.WriteLine("payloads for span:" + payload.Count);
                    }
                    System.Collections.IEnumerator it = payload.GetEnumerator();
                    while (it.MoveNext())
                    {
                        byte[] bytes = (byte[])it.Current;
                        if (DEBUG)
                        {
                            System.Console.Out.WriteLine("doc:" + spans.Doc() + " s:" + spans.Start() + " e:" + spans.End() + " " + new System.String(System.Text.UTF8Encoding.UTF8.GetChars(bytes)));
                        }
                    }

                    Assert.AreEqual(numPayloads[cnt], payload.Count);
                }
                else
                {
                    Assert.IsFalse(numPayloads.Length > 0 && numPayloads[cnt] > 0, "Expected spans:" + numPayloads[cnt] + " found: 0");
                }
                cnt++;
            }

            Assert.AreEqual(numSpans, cnt);
        }
예제 #21
0
 public virtual string s(Spans span)
 {
     return(s(span.Doc(), span.Start(), span.End()));
 }