public virtual FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList,
                                                         int fragCharSize)
        {
            FieldFragList ffl = new SimpleFieldFragList(fragCharSize);

            List <WeightedPhraseInfo> wpil = new List <WeightedPhraseInfo>();

            using (IEnumerator <WeightedPhraseInfo> ite = fieldPhraseList.PhraseList.GetEnumerator())
            {
                WeightedPhraseInfo phraseInfo = null;
                while (true)
                {
                    if (!ite.MoveNext())
                    {
                        break;
                    }
                    phraseInfo = ite.Current;
                    if (phraseInfo == null)
                    {
                        break;
                    }

                    wpil.Add(phraseInfo);
                }
                if (wpil.Count > 0)
                {
                    ffl.Add(0, int.MaxValue, wpil);
                }
                return(ffl);
            }
        }
Esempio n. 2
0
        /// <summary>
        /// A predicate to decide if the given <see cref="WeightedPhraseInfo"/> should be
        /// accepted as a highlighted phrase or if it should be discarded.
        /// <para/>
        /// The default implementation discards phrases that are composed of more than one term
        /// and where the matchLength exceeds the fragment character size.
        /// </summary>
        /// <param name="info">the phrase info to accept</param>
        /// <param name="matchLength">the match length of the current phrase</param>
        /// <param name="fragCharSize">the configured fragment character size</param>
        /// <returns><c>true</c> if this phrase info should be accepted as a highligh phrase</returns>
        protected virtual bool AcceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize)
        {
            // LUCENENET specific - added guard clause to check for null
            if (info is null)
            {
                throw new ArgumentNullException(nameof(WeightedPhraseInfo));
            }

            return(info.TermsOffsets.Count <= 1 || matchLength <= fragCharSize);
        }
Esempio n. 3
0
        public void TestWeightedPhraseInfoComparisonConsistency()
        {
            WeightedPhraseInfo a = newInfo(0, 0, 1);
            WeightedPhraseInfo b = newInfo(1, 2, 1);
            WeightedPhraseInfo c = newInfo(2, 3, 1);
            WeightedPhraseInfo d = newInfo(0, 0, 1);
            WeightedPhraseInfo e = newInfo(0, 0, 2);

            assertConsistentEquals(a, a);
            assertConsistentEquals(b, b);
            assertConsistentEquals(c, c);
            assertConsistentEquals(d, d);
            assertConsistentEquals(e, e);
            assertConsistentEquals(a, d);
            assertConsistentLessThan(a, b);
            assertConsistentLessThan(b, c);
            assertConsistentLessThan(a, c);
            assertConsistentLessThan(a, e);
            assertConsistentLessThan(e, b);
            assertConsistentLessThan(e, c);
            assertConsistentLessThan(d, b);
            assertConsistentLessThan(d, c);
            assertConsistentLessThan(d, e);
        }
Esempio n. 4
0
        protected virtual FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize)
        {
            if (fragCharSize < minFragCharSize)
            {
                throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher.");
            }

            List <WeightedPhraseInfo> wpil = new List <WeightedPhraseInfo>();

            using (IteratorQueue <WeightedPhraseInfo> queue = new IteratorQueue <WeightedPhraseInfo>(fieldPhraseList.PhraseList.GetEnumerator()))
            {
                WeightedPhraseInfo phraseInfo = null;
                int startOffset = 0;
                while ((phraseInfo = queue.Top()) != null)
                {
                    // if the phrase violates the border of previous fragment, discard it and try next phrase
                    if (phraseInfo.StartOffset < startOffset)
                    {
                        queue.RemoveTop();
                        continue;
                    }

                    wpil.Clear();
                    int currentPhraseStartOffset = phraseInfo.StartOffset;
                    int currentPhraseEndOffset   = phraseInfo.EndOffset;
                    int spanStart = Math.Max(currentPhraseStartOffset - margin, startOffset);
                    int spanEnd   = Math.Max(currentPhraseEndOffset, spanStart + fragCharSize);
                    if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
                    {
                        wpil.Add(phraseInfo);
                    }
                    while ((phraseInfo = queue.Top()) != null)
                    { // pull until we crossed the current spanEnd
                        if (phraseInfo.EndOffset <= spanEnd)
                        {
                            currentPhraseEndOffset = phraseInfo.EndOffset;
                            if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
                            {
                                wpil.Add(phraseInfo);
                            }
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (wpil.Count == 0)
                    {
                        continue;
                    }

                    int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
                    // now recalculate the start and end position to "center" the result
                    int newMargin = Math.Max(0, (fragCharSize - matchLen) / 2); // matchLen can be > fragCharSize prevent IAOOB here
                    spanStart = currentPhraseStartOffset - newMargin;
                    if (spanStart < startOffset)
                    {
                        spanStart = startOffset;
                    }
                    // whatever is bigger here we grow this out
                    spanEnd     = spanStart + Math.Max(matchLen, fragCharSize);
                    startOffset = spanEnd;
                    fieldFragList.Add(spanStart, spanEnd, wpil);
                }
            }
            return(fieldFragList);
        }
Esempio n. 5
0
 /// <summary>
 /// A predicate to decide if the given <see cref="WeightedPhraseInfo"/> should be
 /// accepted as a highlighted phrase or if it should be discarded.
 /// <para/>
 /// The default implementation discards phrases that are composed of more than one term
 /// and where the matchLength exceeds the fragment character size.
 /// </summary>
 /// <param name="info">the phrase info to accept</param>
 /// <param name="matchLength">the match length of the current phrase</param>
 /// <param name="fragCharSize">the configured fragment character size</param>
 /// <returns><c>true</c> if this phrase info should be accepted as a highligh phrase</returns>
 protected virtual bool AcceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize)
 {
     return(info.TermsOffsets.Count <= 1 || matchLength <= fragCharSize);
 }