/// <summary> /// Returns the <see cref="PassageFormatter"/> to use for /// formatting passages into highlighted snippets. This /// returns a new <see cref="PassageFormatter"/> by default; /// subclasses can override to customize. /// </summary> protected virtual PassageFormatter GetFormatter(string field) { if (defaultFormatter is null) { defaultFormatter = new DefaultPassageFormatter(); } return(defaultFormatter); }
public PostingsHighlighterAnalyzerAndFormatterHelper(Analyzer analyzer, PassageFormatter formatter) : base(analyzer) { this.formatter = formatter; }
private IDictionary <int, object> HighlightField(string field, string[] contents, BreakIterator bi, BytesRef[] terms, int[] docids, IList <AtomicReaderContext> leaves, int maxPassages, Query query) { IDictionary <int, object> highlights = new Dictionary <int, object>(); PassageFormatter fieldFormatter = GetFormatter(field); if (fieldFormatter is null) { // LUCENENET: Changed from NullPointerException to InvalidOperationException (which isn't caught anywhere outside of tests) throw IllegalStateException.Create("PassageFormatter cannot be null"); } // check if we should do any multiterm processing Analyzer analyzer = GetIndexAnalyzer(field); CharacterRunAutomaton[] automata = Arrays.Empty <CharacterRunAutomaton>(); if (analyzer != null) { automata = MultiTermHighlighting.ExtractAutomata(query, field); } // resize 'terms', where the last term is the multiterm matcher if (automata.Length > 0) { BytesRef[] newTerms = new BytesRef[terms.Length + 1]; System.Array.Copy(terms, 0, newTerms, 0, terms.Length); terms = newTerms; } // we are processing in increasing docid order, so we only need to reinitialize stuff on segment changes // otherwise, we will just advance() existing enums to the new document in the same segment. DocsAndPositionsEnum[] postings = null; TermsEnum termsEnum = null; int lastLeaf = -1; for (int i = 0; i < docids.Length; i++) { string content = contents[i]; if (content.Length == 0) { continue; // nothing to do } bi.SetText(content); int doc = docids[i]; int leaf = ReaderUtil.SubIndex(doc, leaves); AtomicReaderContext subContext = leaves[leaf]; AtomicReader r = subContext.AtomicReader; if (Debugging.AssertsEnabled) { Debugging.Assert(leaf >= lastLeaf); // increasing order } // if the segment has changed, we must initialize new enums. if (leaf != lastLeaf) { Terms t = r.GetTerms(field); if (t != null) { termsEnum = t.GetEnumerator(); postings = new DocsAndPositionsEnum[terms.Length]; } } if (termsEnum is null) { continue; // no terms for this field, nothing to do } // if there are multi-term matches, we have to initialize the "fake" enum for each document if (automata.Length > 0) { DocsAndPositionsEnum dp = MultiTermHighlighting.GetDocsEnum(analyzer.GetTokenStream(field, content), automata); dp.Advance(doc - subContext.DocBase); postings[terms.Length - 1] = dp; // last term is the multiterm matcher } Passage[] passages = HighlightDoc(field, terms, content.Length, bi, doc - subContext.DocBase, termsEnum, postings, maxPassages); if (passages.Length == 0) { // no passages were returned, so ask for a default summary passages = GetEmptyHighlight(field, bi, maxPassages); } if (passages.Length > 0) { highlights[doc] = fieldFormatter.Format(passages, content); } lastLeaf = leaf; } return(highlights); }