Beispiel #1
0
        public virtual FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList,
                                                         int fragCharSize)
        {
            FieldFragList ffl = new SimpleFieldFragList(fragCharSize);

            IList <WeightedPhraseInfo> wpil = new JCG.List <WeightedPhraseInfo>();

            using IEnumerator <WeightedPhraseInfo> ite = fieldPhraseList.PhraseList.GetEnumerator();
            WeightedPhraseInfo phraseInfo = null;

            while (true)
            {
                if (!ite.MoveNext())
                {
                    break;
                }
                phraseInfo = ite.Current;
                if (phraseInfo is null)
                {
                    break;
                }

                wpil.Add(phraseInfo);
            }
            if (wpil.Count > 0)
            {
                ffl.Add(0, int.MaxValue, wpil);
            }
            return(ffl);
        }
Beispiel #2
0
        /// <summary>
        /// A predicate to decide if the given <see cref="WeightedPhraseInfo"/> should be
        /// accepted as a highlighted phrase or if it should be discarded.
        /// <para/>
        /// The default implementation discards phrases that are composed of more than one term
        /// and where the matchLength exceeds the fragment character size.
        /// </summary>
        /// <param name="info">the phrase info to accept</param>
        /// <param name="matchLength">the match length of the current phrase</param>
        /// <param name="fragCharSize">the configured fragment character size</param>
        /// <returns><c>true</c> if this phrase info should be accepted as a highligh phrase</returns>
        protected virtual bool AcceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize)
        {
            // LUCENENET specific - added guard clause to check for null
            if (info is null)
            {
                throw new ArgumentNullException(nameof(WeightedPhraseInfo));
            }

            return(info.TermsOffsets.Count <= 1 || matchLength <= fragCharSize);
        }
        protected virtual FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize)
        {
            if (fragCharSize < minFragCharSize)
            {
                throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher.");
            }

            List <WeightedPhraseInfo> wpil = new List <WeightedPhraseInfo>();

            using (IteratorQueue <WeightedPhraseInfo> queue = new IteratorQueue <WeightedPhraseInfo>(fieldPhraseList.PhraseList.GetEnumerator()))
            {
                WeightedPhraseInfo phraseInfo = null;
                int startOffset = 0;
                while ((phraseInfo = queue.Top()) != null)
                {
                    // if the phrase violates the border of previous fragment, discard it and try next phrase
                    if (phraseInfo.StartOffset < startOffset)
                    {
                        queue.RemoveTop();
                        continue;
                    }

                    wpil.Clear();
                    int currentPhraseStartOffset = phraseInfo.StartOffset;
                    int currentPhraseEndOffset   = phraseInfo.EndOffset;
                    int spanStart = Math.Max(currentPhraseStartOffset - margin, startOffset);
                    int spanEnd   = Math.Max(currentPhraseEndOffset, spanStart + fragCharSize);
                    if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
                    {
                        wpil.Add(phraseInfo);
                    }
                    while ((phraseInfo = queue.Top()) != null)
                    { // pull until we crossed the current spanEnd
                        if (phraseInfo.EndOffset <= spanEnd)
                        {
                            currentPhraseEndOffset = phraseInfo.EndOffset;
                            if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
                            {
                                wpil.Add(phraseInfo);
                            }
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (wpil.Count == 0)
                    {
                        continue;
                    }

                    int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
                    // now recalculate the start and end position to "center" the result
                    int newMargin = Math.Max(0, (fragCharSize - matchLen) / 2); // matchLen can be > fragCharSize prevent IAOOB here
                    spanStart = currentPhraseStartOffset - newMargin;
                    if (spanStart < startOffset)
                    {
                        spanStart = startOffset;
                    }
                    // whatever is bigger here we grow this out
                    spanEnd     = spanStart + Math.Max(matchLen, fragCharSize);
                    startOffset = spanEnd;
                    fieldFragList.Add(spanStart, spanEnd, wpil);
                }
            }
            return(fieldFragList);
        }
 /// <summary>
 /// A predicate to decide if the given <see cref="WeightedPhraseInfo"/> should be
 /// accepted as a highlighted phrase or if it should be discarded.
 /// <para/>
 /// The default implementation discards phrases that are composed of more than one term
 /// and where the matchLength exceeds the fragment character size.
 /// </summary>
 /// <param name="info">the phrase info to accept</param>
 /// <param name="matchLength">the match length of the current phrase</param>
 /// <param name="fragCharSize">the configured fragment character size</param>
 /// <returns><c>true</c> if this phrase info should be accepted as a highligh phrase</returns>
 protected virtual bool AcceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize)
 {
     return(info.TermsOffsets.Count <= 1 || matchLength <= fragCharSize);
 }