public override bool LessThan(System.Object o1, System.Object o2) { Spans spans1 = (Spans)o1; Spans spans2 = (Spans)o2; if (spans1.Doc() == spans2.Doc()) { if (spans1.Start() == spans2.Start()) { return(spans1.End() < spans2.End()); } else { return(spans1.Start() < spans2.Start()); } } else { return(spans1.Doc() < spans2.Doc()); } }
public /*protected internal*/ virtual bool SetFreqCurrentDoc() { if (!more) { return(false); } doc = spans.Doc(); freq = 0.0f; do { int matchLength = spans.End() - spans.Start(); freq += GetSimilarity().SloppyFreq(matchLength); more = spans.Next(); }while (more && (doc == spans.Doc())); return(true); }
public override int Start() { return(spans.Start()); }
/// <summary>The subSpans are ordered in the same doc, so there is a possible match. /// Compute the slop while making the match as short as possible by advancing /// all subSpans except the last one in reverse order. /// </summary> private bool ShrinkToAfterShortestMatch() { matchStart = subSpans[subSpans.Length - 1].Start(); matchEnd = subSpans[subSpans.Length - 1].End(); System.Collections.Generic.Dictionary <byte[], byte[]> possibleMatchPayloads = new System.Collections.Generic.Dictionary <byte[], byte[]>(); if (subSpans[subSpans.Length - 1].IsPayloadAvailable()) { System.Collections.Generic.ICollection <byte[]> payload = subSpans[subSpans.Length - 1].GetPayload(); foreach (byte[] pl in payload) { if (!possibleMatchPayloads.ContainsKey(pl)) { possibleMatchPayloads.Add(pl, pl); } } } System.Collections.Generic.List <byte[]> possiblePayload = null; int matchSlop = 0; int lastStart = matchStart; int lastEnd = matchEnd; for (int i = subSpans.Length - 2; i >= 0; i--) { Spans prevSpans = subSpans[i]; if (collectPayloads && prevSpans.IsPayloadAvailable()) { System.Collections.Generic.ICollection <byte[]> payload = prevSpans.GetPayload(); possiblePayload = new System.Collections.Generic.List <byte[]>(payload.Count); possiblePayload.AddRange(payload); } int prevStart = prevSpans.Start(); int prevEnd = prevSpans.End(); while (true) { // Advance prevSpans until after (lastStart, lastEnd) if (!prevSpans.Next()) { inSameDoc = false; more = false; break; // Check remaining subSpans for final match. } else if (matchDoc != prevSpans.Doc()) { inSameDoc = false; // The last subSpans is not advanced here. break; // Check remaining subSpans for last match in this document. } else { int ppStart = prevSpans.Start(); int ppEnd = prevSpans.End(); // Cannot avoid invoking .end() if (!DocSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) { break; // Check remaining subSpans. } else { // prevSpans still before (lastStart, lastEnd) prevStart = ppStart; prevEnd = ppEnd; if (collectPayloads && prevSpans.IsPayloadAvailable()) { System.Collections.Generic.ICollection <byte[]> payload = prevSpans.GetPayload(); possiblePayload = new System.Collections.Generic.List <byte[]>(payload.Count); possiblePayload.AddRange(payload); } } } } if (collectPayloads && possiblePayload != null) { foreach (byte[] pl in possiblePayload) { if (!possibleMatchPayloads.ContainsKey(pl)) { possibleMatchPayloads.Add(pl, pl); } } } System.Diagnostics.Debug.Assert(prevStart <= matchStart); if (matchStart > prevEnd) { // Only non overlapping spans add to slop. matchSlop += (matchStart - prevEnd); } /* Do not break on (matchSlop > allowedSlop) here to make sure * that subSpans[0] is advanced after the match, if any. */ matchStart = prevStart; lastStart = prevStart; lastEnd = prevEnd; } bool match = matchSlop <= allowedSlop; if (collectPayloads && match && possibleMatchPayloads.Count > 0) { matchPayload.AddRange(possibleMatchPayloads.Keys); } return(match); // ordered and allowed slop }
/// <summary>Check whether two Spans in the same document are ordered.</summary> /// <param name="spans1"> /// </param> /// <param name="spans2"> /// </param> /// <returns> true iff spans1 starts before spans2 /// or the spans start at the same position, /// and spans1 ends before spans2. /// </returns> internal static bool DocSpansOrdered(Spans spans1, Spans spans2) { System.Diagnostics.Debug.Assert(spans1.Doc() == spans2.Doc(), "doc1 " + spans1.Doc() + " != doc2 " + spans2.Doc()); int start1 = spans1.Start(); int start2 = spans2.Start(); /* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */ return (start1 == start2)?(spans1.End() < spans2.End()):(start1 < start2); }
public override bool Next() { if (moreInclude) { // move to next include moreInclude = includeSpans.Next(); } while (moreInclude && moreExclude) { if (includeSpans.Doc() > excludeSpans.Doc()) { // skip exclude moreExclude = excludeSpans.SkipTo(includeSpans.Doc()); } while (moreExclude && includeSpans.Doc() == excludeSpans.Doc() && excludeSpans.End() <= includeSpans.Start()) { moreExclude = excludeSpans.Next(); // increment exclude } if (!moreExclude || includeSpans.Doc() != excludeSpans.Doc() || includeSpans.End() <= excludeSpans.Start()) { break; // we found a match } moreInclude = includeSpans.Next(); // intersected: keep scanning } return(moreInclude); }