/// <summary> /// return the best fragment. /// </summary> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> /// <param name="reader"><see cref="IndexReader"/> of the index</param> /// <param name="docId">document id to be highlighted</param> /// <param name="fieldName">field of the document to be highlighted</param> /// <param name="fragCharSize">the length (number of chars) of a fragment</param> /// <param name="fragListBuilder"><see cref="IFragListBuilder"/> object</param> /// <param name="fragmentsBuilder"><see cref="IFragmentsBuilder"/> object</param> /// <param name="preTags">pre-tags to be used to highlight terms</param> /// <param name="postTags">post-tags to be used to highlight terms</param> /// <param name="encoder">an encoder that generates encoded text</param> /// <returns>the best fragment (snippet) string</returns> /// <exception cref="IOException">If there is a low-level I/O error</exception> public string GetBestFragment(FieldQuery fieldQuery, IndexReader reader, int docId, string fieldName, int fragCharSize, IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder, string[] preTags, string[] postTags, IEncoder encoder) { FieldFragList fieldFragList = GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize); return(fragmentsBuilder.CreateFragment(reader, docId, fieldName, fieldFragList, preTags, postTags, encoder)); }
/// <summary> /// Return the best fragments. Matches are scanned from <paramref name="matchedFields"/> and turned into fragments against /// <paramref name="storedField"/>. The highlighting may not make sense if <paramref name="matchedFields"/> has matches with offsets that don't /// correspond features in <paramref name="storedField"/>. It will outright throw a <see cref="IndexOutOfRangeException"/> /// if <paramref name="matchedFields"/> produces offsets outside of <paramref name="storedField"/>. As such it is advisable that all /// <paramref name="matchedFields"/> share the same source as <paramref name="storedField"/> or are at least a prefix of it. /// </summary> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> /// <param name="reader"><see cref="IndexReader"/> of the index</param> /// <param name="docId">document id to be highlighted</param> /// <param name="storedField">field of the document that stores the text</param> /// <param name="matchedFields">fields of the document to scan for matches</param> /// <param name="fragCharSize">the length (number of chars) of a fragment</param> /// <param name="maxNumFragments">maximum number of fragments</param> /// <param name="fragListBuilder"><see cref="IFragListBuilder"/> object</param> /// <param name="fragmentsBuilder"><see cref="IFragmentsBuilder"/> object</param> /// <param name="preTags">pre-tags to be used to highlight terms</param> /// <param name="postTags">post-tags to be used to highlight terms</param> /// <param name="encoder">an encoder that generates encoded text</param> /// <returns> /// created fragments or null when no fragments created. /// size of the array can be less than <paramref name="maxNumFragments"/> /// </returns> /// <exception cref="IOException">If there is a low-level I/O error</exception> public string[] GetBestFragments(FieldQuery fieldQuery, IndexReader reader, int docId, string storedField, ISet <string> matchedFields, int fragCharSize, int maxNumFragments, IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder, string[] preTags, string[] postTags, IEncoder encoder) { FieldFragList fieldFragList = GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, matchedFields, fragCharSize); return(fragmentsBuilder.CreateFragments(reader, docId, storedField, fieldFragList, maxNumFragments, preTags, postTags, encoder)); }
public virtual string CreateFragment(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, string[] preTags, string[] postTags, IEncoder encoder) { string[] fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1, preTags, postTags, encoder); if (fragments == null || fragments.Length == 0) { return(null); } return(fragments[0]); }
public virtual string[] CreateFragments(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, int maxNumFragments, string[] preTags, string[] postTags, IEncoder encoder) { if (maxNumFragments < 0) { throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number."); } IList <WeightedFragInfo> fragInfos = fieldFragList.FragInfos; Field[] values = GetFields(reader, docId, fieldName); if (values.Length == 0) { return(null); } if (discreteMultiValueHighlighting && values.Length > 1) { fragInfos = DiscreteMultiValueHighlighting(fragInfos, values); } fragInfos = GetWeightedFragInfoList(fragInfos); int limitFragments = maxNumFragments < fragInfos.Count ? maxNumFragments : fragInfos.Count; List <string> fragments = new List <string>(limitFragments); StringBuilder buffer = new StringBuilder(); int[] nextValueIndex = { 0 }; for (int n = 0; n < limitFragments; n++) { WeightedFragInfo fragInfo = fragInfos[n]; fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder)); } return(fragments.ToArray(/* new String[fragments.size()] */)); }
protected virtual FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize) { if (fragCharSize < minFragCharSize) { throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher."); } List <WeightedPhraseInfo> wpil = new List <WeightedPhraseInfo>(); using (IteratorQueue <WeightedPhraseInfo> queue = new IteratorQueue <WeightedPhraseInfo>(fieldPhraseList.PhraseList.GetEnumerator())) { WeightedPhraseInfo phraseInfo = null; int startOffset = 0; while ((phraseInfo = queue.Top()) != null) { // if the phrase violates the border of previous fragment, discard it and try next phrase if (phraseInfo.StartOffset < startOffset) { queue.RemoveTop(); continue; } wpil.Clear(); int currentPhraseStartOffset = phraseInfo.StartOffset; int currentPhraseEndOffset = phraseInfo.EndOffset; int spanStart = Math.Max(currentPhraseStartOffset - margin, startOffset); int spanEnd = Math.Max(currentPhraseEndOffset, spanStart + fragCharSize); if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) { wpil.Add(phraseInfo); } while ((phraseInfo = queue.Top()) != null) { // pull until we crossed the current spanEnd if (phraseInfo.EndOffset <= spanEnd) { currentPhraseEndOffset = phraseInfo.EndOffset; if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) { wpil.Add(phraseInfo); } } else { break; } } if (wpil.Count == 0) { continue; } int matchLen = currentPhraseEndOffset - currentPhraseStartOffset; // now recalculate the start and end position to "center" the result int newMargin = Math.Max(0, (fragCharSize - matchLen) / 2); // matchLen can be > fragCharSize prevent IAOOB here spanStart = currentPhraseStartOffset - newMargin; if (spanStart < startOffset) { spanStart = startOffset; } // whatever is bigger here we grow this out spanEnd = spanStart + Math.Max(matchLen, fragCharSize); startOffset = spanEnd; fieldFragList.Add(spanStart, spanEnd, wpil); } } return(fieldFragList); }
public virtual string CreateFragment(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList) { return(CreateFragment(reader, docId, fieldName, fieldFragList, m_preTags, m_postTags, NULL_ENCODER)); }