Esempio n. 1
0
 /// <summary>
 /// Returns the <see cref="PassageFormatter"/> to use for
 /// formatting passages into highlighted snippets.  This
 /// returns a new <see cref="PassageFormatter"/> by default;
 /// subclasses can override to customize.
 /// </summary>
 protected virtual PassageFormatter GetFormatter(string field)
 {
     if (defaultFormatter is null)
     {
         defaultFormatter = new DefaultPassageFormatter();
     }
     return(defaultFormatter);
 }
 public PostingsHighlighterAnalyzerAndFormatterHelper(Analyzer analyzer, PassageFormatter formatter)
     : base(analyzer)
 {
     this.formatter = formatter;
 }
Esempio n. 3
0
        private IDictionary <int, object> HighlightField(string field, string[] contents, BreakIterator bi, BytesRef[] terms, int[] docids, IList <AtomicReaderContext> leaves, int maxPassages, Query query)
        {
            IDictionary <int, object> highlights = new Dictionary <int, object>();

            PassageFormatter fieldFormatter = GetFormatter(field);

            if (fieldFormatter is null)
            {
                // LUCENENET: Changed from NullPointerException to InvalidOperationException (which isn't caught anywhere outside of tests)
                throw IllegalStateException.Create("PassageFormatter cannot be null");
            }

            // check if we should do any multiterm processing
            Analyzer analyzer = GetIndexAnalyzer(field);

            CharacterRunAutomaton[] automata = Arrays.Empty <CharacterRunAutomaton>();
            if (analyzer != null)
            {
                automata = MultiTermHighlighting.ExtractAutomata(query, field);
            }

            // resize 'terms', where the last term is the multiterm matcher
            if (automata.Length > 0)
            {
                BytesRef[] newTerms = new BytesRef[terms.Length + 1];
                System.Array.Copy(terms, 0, newTerms, 0, terms.Length);
                terms = newTerms;
            }

            // we are processing in increasing docid order, so we only need to reinitialize stuff on segment changes
            // otherwise, we will just advance() existing enums to the new document in the same segment.
            DocsAndPositionsEnum[] postings = null;
            TermsEnum termsEnum             = null;
            int       lastLeaf = -1;

            for (int i = 0; i < docids.Length; i++)
            {
                string content = contents[i];
                if (content.Length == 0)
                {
                    continue; // nothing to do
                }
                bi.SetText(content);
                int doc  = docids[i];
                int leaf = ReaderUtil.SubIndex(doc, leaves);
                AtomicReaderContext subContext = leaves[leaf];
                AtomicReader        r          = subContext.AtomicReader;

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(leaf >= lastLeaf);                           // increasing order
                }
                // if the segment has changed, we must initialize new enums.
                if (leaf != lastLeaf)
                {
                    Terms t = r.GetTerms(field);
                    if (t != null)
                    {
                        termsEnum = t.GetEnumerator();
                        postings  = new DocsAndPositionsEnum[terms.Length];
                    }
                }
                if (termsEnum is null)
                {
                    continue; // no terms for this field, nothing to do
                }

                // if there are multi-term matches, we have to initialize the "fake" enum for each document
                if (automata.Length > 0)
                {
                    DocsAndPositionsEnum dp = MultiTermHighlighting.GetDocsEnum(analyzer.GetTokenStream(field, content), automata);
                    dp.Advance(doc - subContext.DocBase);
                    postings[terms.Length - 1] = dp; // last term is the multiterm matcher
                }

                Passage[] passages = HighlightDoc(field, terms, content.Length, bi, doc - subContext.DocBase, termsEnum, postings, maxPassages);

                if (passages.Length == 0)
                {
                    // no passages were returned, so ask for a default summary
                    passages = GetEmptyHighlight(field, bi, maxPassages);
                }

                if (passages.Length > 0)
                {
                    highlights[doc] = fieldFormatter.Format(passages, content);
                }

                lastLeaf = leaf;
            }

            return(highlights);
        }