Esempio n. 1
0
        /// <summary>
        /// return the best fragment.
        /// </summary>
        /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
        /// <param name="reader"><see cref="IndexReader"/> of the index</param>
        /// <param name="docId">document id to be highlighted</param>
        /// <param name="fieldName">field of the document to be highlighted</param>
        /// <param name="fragCharSize">the length (number of chars) of a fragment</param>
        /// <param name="fragListBuilder"><see cref="IFragListBuilder"/> object</param>
        /// <param name="fragmentsBuilder"><see cref="IFragmentsBuilder"/> object</param>
        /// <param name="preTags">pre-tags to be used to highlight terms</param>
        /// <param name="postTags">post-tags to be used to highlight terms</param>
        /// <param name="encoder">an encoder that generates encoded text</param>
        /// <returns>the best fragment (snippet) string</returns>
        /// <exception cref="IOException">If there is a low-level I/O error</exception>
        public string GetBestFragment(FieldQuery fieldQuery, IndexReader reader, int docId,
                                      string fieldName, int fragCharSize,
                                      IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder,
                                      string[] preTags, string[] postTags, IEncoder encoder)
        {
            FieldFragList fieldFragList = GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);

            return(fragmentsBuilder.CreateFragment(reader, docId, fieldName, fieldFragList, preTags, postTags, encoder));
        }
Esempio n. 2
0
        /// <summary>
        /// Return the best fragments.  Matches are scanned from <paramref name="matchedFields"/> and turned into fragments against
        /// <paramref name="storedField"/>.  The highlighting may not make sense if <paramref name="matchedFields"/> has matches with offsets that don't
        /// correspond features in <paramref name="storedField"/>.  It will outright throw a <see cref="IndexOutOfRangeException"/>
        /// if <paramref name="matchedFields"/> produces offsets outside of <paramref name="storedField"/>.  As such it is advisable that all
        /// <paramref name="matchedFields"/> share the same source as <paramref name="storedField"/> or are at least a prefix of it.
        /// </summary>
        /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
        /// <param name="reader"><see cref="IndexReader"/> of the index</param>
        /// <param name="docId">document id to be highlighted</param>
        /// <param name="storedField">field of the document that stores the text</param>
        /// <param name="matchedFields">fields of the document to scan for matches</param>
        /// <param name="fragCharSize">the length (number of chars) of a fragment</param>
        /// <param name="maxNumFragments">maximum number of fragments</param>
        /// <param name="fragListBuilder"><see cref="IFragListBuilder"/> object</param>
        /// <param name="fragmentsBuilder"><see cref="IFragmentsBuilder"/> object</param>
        /// <param name="preTags">pre-tags to be used to highlight terms</param>
        /// <param name="postTags">post-tags to be used to highlight terms</param>
        /// <param name="encoder">an encoder that generates encoded text</param>
        /// <returns>
        /// created fragments or null when no fragments created.
        /// size of the array can be less than <paramref name="maxNumFragments"/>
        /// </returns>
        /// <exception cref="IOException">If there is a low-level I/O error</exception>
        public string[] GetBestFragments(FieldQuery fieldQuery, IndexReader reader, int docId,
                                         string storedField, ISet <string> matchedFields, int fragCharSize, int maxNumFragments,
                                         IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder,
                                         string[] preTags, string[] postTags, IEncoder encoder)
        {
            FieldFragList fieldFragList =
                GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, matchedFields, fragCharSize);

            return(fragmentsBuilder.CreateFragments(reader, docId, storedField, fieldFragList, maxNumFragments,
                                                    preTags, postTags, encoder));
        }
Esempio n. 3
0
 public virtual string CreateFragment(IndexReader reader, int docId,
                                      string fieldName, FieldFragList fieldFragList, string[] preTags, string[] postTags,
                                      IEncoder encoder)
 {
     string[]
     fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1,
                                 preTags, postTags, encoder);
     if (fragments == null || fragments.Length == 0)
     {
         return(null);
     }
     return(fragments[0]);
 }
Esempio n. 4
0
        public virtual string[] CreateFragments(IndexReader reader, int docId,
                                                string fieldName, FieldFragList fieldFragList, int maxNumFragments,
                                                string[] preTags, string[] postTags, IEncoder encoder)
        {
            if (maxNumFragments < 0)
            {
                throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number.");
            }

            IList <WeightedFragInfo> fragInfos = fieldFragList.FragInfos;

            Field[]
            values = GetFields(reader, docId, fieldName);
            if (values.Length == 0)
            {
                return(null);
            }

            if (discreteMultiValueHighlighting && values.Length > 1)
            {
                fragInfos = DiscreteMultiValueHighlighting(fragInfos, values);
            }

            fragInfos = GetWeightedFragInfoList(fragInfos);
            int           limitFragments = maxNumFragments < fragInfos.Count ? maxNumFragments : fragInfos.Count;
            List <string> fragments      = new List <string>(limitFragments);

            StringBuilder buffer = new StringBuilder();

            int[] nextValueIndex = { 0 };
            for (int n = 0; n < limitFragments; n++)
            {
                WeightedFragInfo fragInfo = fragInfos[n];
                fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder));
            }
            return(fragments.ToArray(/* new String[fragments.size()] */));
        }
Esempio n. 5
0
        protected virtual FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize)
        {
            if (fragCharSize < minFragCharSize)
            {
                throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher.");
            }

            List <WeightedPhraseInfo> wpil = new List <WeightedPhraseInfo>();

            using (IteratorQueue <WeightedPhraseInfo> queue = new IteratorQueue <WeightedPhraseInfo>(fieldPhraseList.PhraseList.GetEnumerator()))
            {
                WeightedPhraseInfo phraseInfo = null;
                int startOffset = 0;
                while ((phraseInfo = queue.Top()) != null)
                {
                    // if the phrase violates the border of previous fragment, discard it and try next phrase
                    if (phraseInfo.StartOffset < startOffset)
                    {
                        queue.RemoveTop();
                        continue;
                    }

                    wpil.Clear();
                    int currentPhraseStartOffset = phraseInfo.StartOffset;
                    int currentPhraseEndOffset   = phraseInfo.EndOffset;
                    int spanStart = Math.Max(currentPhraseStartOffset - margin, startOffset);
                    int spanEnd   = Math.Max(currentPhraseEndOffset, spanStart + fragCharSize);
                    if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
                    {
                        wpil.Add(phraseInfo);
                    }
                    while ((phraseInfo = queue.Top()) != null)
                    { // pull until we crossed the current spanEnd
                        if (phraseInfo.EndOffset <= spanEnd)
                        {
                            currentPhraseEndOffset = phraseInfo.EndOffset;
                            if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
                            {
                                wpil.Add(phraseInfo);
                            }
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (wpil.Count == 0)
                    {
                        continue;
                    }

                    int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
                    // now recalculate the start and end position to "center" the result
                    int newMargin = Math.Max(0, (fragCharSize - matchLen) / 2); // matchLen can be > fragCharSize prevent IAOOB here
                    spanStart = currentPhraseStartOffset - newMargin;
                    if (spanStart < startOffset)
                    {
                        spanStart = startOffset;
                    }
                    // whatever is bigger here we grow this out
                    spanEnd     = spanStart + Math.Max(matchLen, fragCharSize);
                    startOffset = spanEnd;
                    fieldFragList.Add(spanStart, spanEnd, wpil);
                }
            }
            return(fieldFragList);
        }
Esempio n. 6
0
 public virtual string CreateFragment(IndexReader reader, int docId,
                                      string fieldName, FieldFragList fieldFragList)
 {
     return(CreateFragment(reader, docId, fieldName, fieldFragList,
                           m_preTags, m_postTags, NULL_ENCODER));
 }