GetHighlighter() public method

public GetHighlighter ( Query query, String fieldName, TokenStream stream, IFormatter formatter ) : Highlighter
query Query
fieldName String
stream Lucene.Net.Analysis.TokenStream
formatter IFormatter
return Highlighter
        public void TestGetBestSingleFragmentWithWeights()
        {

            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        WeightedSpanTerm[] wTerms = new WeightedSpanTerm[2];
                                        wTerms[0] = new WeightedSpanTerm(10f, "hello");

                                        var positionSpans = new List<PositionSpan> {new PositionSpan(0, 0)};
                                        wTerms[0].AddPositionSpans(positionSpans);

                                        wTerms[1] = new WeightedSpanTerm(1f, "kennedy");
                                        positionSpans = new List<PositionSpan> {new PositionSpan(14, 14)};
                                        wTerms[1].AddPositionSpans(positionSpans);

                                        Highlighter highlighter = helper.GetHighlighter(wTerms, this); // new
                                        // Highlighter(new
                                        // QueryTermScorer(wTerms));
                                        TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME,
                                                                                       new StringReader(texts[0]));
                                        highlighter.TextFragmenter = new SimpleFragmenter(2);

                                        String result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim();
                                        Assert.IsTrue("<B>Hello</B>".Equals(result),
                                                      "Failed to find best section using weighted terms. Found: [" +
                                                      result + "]");

                                        // readjust weights
                                        wTerms[1].Weight = 50f;
                                        tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(texts[0]));
                                        highlighter = helper.GetHighlighter(wTerms, this);
                                        highlighter.TextFragmenter = new SimpleFragmenter(2);

                                        result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim();
                                        Assert.IsTrue("<B>kennedy</B>".Equals(result),
                                                      "Failed to find best section using weighted terms. Found: " +
                                                      result);
                                    };

            helper.Start();

        }
        public void TestOverlapAnalyzer2()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        String s = "Hi-Speed10 foo";

                                        Query query;
                                        Highlighter highlighter;
                                        String result;

                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("foo");
                                        highlighter = helper.GetHighlighter(query, "text", getTS2(), this);
                                        result = highlighter.GetBestFragments(getTS2(), s, 3, "...");
                                        Assert.AreEqual(result, "Hi-Speed10 <B>foo</B>");

                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("10");
                                        highlighter = helper.GetHighlighter(query, "text", getTS2(), this);
                                        result = highlighter.GetBestFragments(getTS2(), s, 3, "...");
                                        Assert.AreEqual(result, "Hi-Speed<B>10</B> foo");

                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("hi");
                                        highlighter = helper.GetHighlighter(query, "text", getTS2(), this);
                                        result = highlighter.GetBestFragments(getTS2(), s, 3, "...");
                                        Assert.AreEqual(result, "<B>Hi</B>-Speed10 foo");

                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse(
                                                "speed");
                                        highlighter = helper.GetHighlighter(query, "text", getTS2(), this);
                                        result = highlighter.GetBestFragments(getTS2(), s, 3, "...");
                                        Assert.AreEqual(result, "Hi-<B>Speed</B>10 foo");

                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse(
                                                "hispeed");
                                        highlighter = helper.GetHighlighter(query, "text", getTS2(), this);
                                        result = highlighter.GetBestFragments(getTS2(), s, 3, "...");
                                        Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo");

                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse(
                                                "hi speed");
                                        highlighter = helper.GetHighlighter(query, "text", getTS2(), this);
                                        result = highlighter.GetBestFragments(getTS2(), s, 3, "...");
                                        Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo");

                                        // ///////////////// same tests, just put the bigger overlapping token
                                        // first
                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("foo");
                                        highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this);
                                        result = highlighter.GetBestFragments(GetTS2A(), s, 3, "...");
                                        Assert.AreEqual(result, "Hi-Speed10 <B>foo</B>");

                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("10");
                                        highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this);
                                        result = highlighter.GetBestFragments(GetTS2A(), s, 3, "...");
                                        Assert.AreEqual(result, "Hi-Speed<B>10</B> foo");

                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("hi");
                                        highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this);
                                        result = highlighter.GetBestFragments(GetTS2A(), s, 3, "...");
                                        Assert.AreEqual(result, "<B>Hi</B>-Speed10 foo");

                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse(
                                                "speed");
                                        highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this);
                                        result = highlighter.GetBestFragments(GetTS2A(), s, 3, "...");
                                        Assert.AreEqual(result, "Hi-<B>Speed</B>10 foo");

                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse(
                                                "hispeed");
                                        highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this);
                                        result = highlighter.GetBestFragments(GetTS2A(), s, 3, "...");
                                        Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo");

                                        query =
                                            new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse(
                                                "hi speed");
                                        highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this);
                                        result = highlighter.GetBestFragments(GetTS2A(), s, 3, "...");
                                        Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo");
                                    };

            helper.Start();
        }
        public void TestGetBestSingleFragment()
        {

            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        DoSearching("Kennedy");
                                        numHighlights = 0;
                                        for (int i = 0; i < hits.TotalHits; i++)
                                        {
                                            String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                                            TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME,
                                                                                           new StringReader(text));

                                            Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME,
                                                                                            tokenStream,
                                                                                            this);
                                            highlighter.TextFragmenter = new SimpleFragmenter(40);
                                            String result = highlighter.GetBestFragment(tokenStream, text);
                                            Console.WriteLine("\t" + result);
                                        }
                                        Assert.IsTrue(numHighlights == 4,
                                                      "Failed to find correct number of highlights " + numHighlights +
                                                      " found");

                                        numHighlights = 0;
                                        for (int i = 0; i < hits.TotalHits; i++)
                                        {
                                            String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                                            TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME,
                                                                                           new StringReader(text));
                                            Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME,
                                                                                            tokenStream,
                                                                                            this);
                                            highlighter.GetBestFragment(analyzer, FIELD_NAME, text);
                                        }
                                        Assert.IsTrue(numHighlights == 4,
                                                      "Failed to find correct number of highlights " + numHighlights +
                                                      " found");

                                        numHighlights = 0;
                                        for (int i = 0; i < hits.TotalHits; i++)
                                        {
                                            String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);

                                            TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME,
                                                                                           new StringReader(text));
                                            Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME,
                                                                                            tokenStream,
                                                                                            this);
                                            highlighter.GetBestFragments(analyzer, FIELD_NAME, text, 10);
                                        }
                                        Assert.IsTrue(numHighlights == 4,
                                                      "Failed to find correct number of highlights " + numHighlights +
                                                      " found");

                                    };

            helper.Start();

        }
        public void TestUnRewrittenQuery()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        numHighlights = 0;
                                        // test to show how rewritten query can still be used
                                        searcher = new IndexSearcher(ramDir, true);
                                        Analyzer analyzer = new StandardAnalyzer(TEST_VERSION);

                                        QueryParser parser = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer);
                                        Query query = parser.Parse("JF? or Kenned*");
                                        Console.WriteLine("Searching with primitive query");
                                        // forget to set this and...
                                        // query=query.Rewrite(reader);
                                        TopDocs hits = searcher.Search(query, null, 1000);

                                        // create an instance of the highlighter with the tags used to surround
                                        // highlighted text
                                        // QueryHighlightExtractor highlighter = new
                                        // QueryHighlightExtractor(this,
                                        // query, new StandardAnalyzer(TEST_VERSION));

                                        int maxNumFragmentsRequired = 3;

                                        for (int i = 0; i < hits.TotalHits; i++)
                                        {
                                            String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                                            TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME,
                                                                                           new StringReader(text));
                                            Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME,
                                                                                            tokenStream,
                                                                                            this, false);

                                            highlighter.TextFragmenter = new SimpleFragmenter(40);

                                            String highlightedText = highlighter.GetBestFragments(tokenStream, text,
                                                                                                  maxNumFragmentsRequired,
                                                                                                  "...");

                                            Console.WriteLine(highlightedText);
                                        }
                                        // We expect to have zero highlights if the query is multi-terms and is
                                        // not
                                        // rewritten!
                                        Assert.IsTrue(numHighlights == 0,
                                                      "Failed to find correct number of highlights " + numHighlights +
                                                      " found");
                                    };

            helper.Start();
        }
        public void TestNoFragments()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        DoSearching("AnInvalidQueryWhichShouldYieldNoResults");

                                        foreach (string text in texts)
                                        {
                                            TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME,
                                                                                           new StringReader(text));
                                            Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME,
                                                                                            tokenStream,
                                                                                            this);
                                            String result = highlighter.GetBestFragment(tokenStream, text);
                                            Assert.IsNull(result,
                                                          "The highlight result should be null for text with no query terms");
                                        }
                                    };

            helper.Start();
        }
        public void TestMaxSizeEndHighlight()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                {
                    var stopWords = Support.Compatibility.SetFactory.CreateHashSet(new[] {"in", "it"});
                    TermQuery query = new TermQuery(new Term("text", "searchterm"));

                    String text = "this is a text with searchterm in it";
                    SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
                    Highlighter hg = helper.GetHighlighter(query, "text",
                                                           new StandardAnalyzer(TEST_VERSION,
                                                                                stopWords).
                                                               TokenStream("text",
                                                                           new StringReader(text)),
                                                           fm);
                    hg.TextFragmenter = new NullFragmenter();
                    hg.MaxDocCharsToAnalyze = 36;
                    String match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords),
                                                      "text", text);
                    Assert.IsTrue(match.EndsWith("in it"),
                                  "Matched text should contain remainder of text after highlighted query ");
                };
            helper.Start();
        }
        public void TestMaxSizeHighlightTruncates()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        var goodWord = "goodtoken";
                                        var stopWords = Support.Compatibility.SetFactory.CreateHashSet(new[] { "stoppedtoken" });

                                        var query = new TermQuery(new Term("data", goodWord));

                                        string match;
                                        StringBuilder sb = new StringBuilder();
                                        sb.Append(goodWord);
                                        for (int i = 0; i < 10000; i++)
                                        {
                                            sb.Append(" ");
                                            // only one stopword
                                            sb.Append(stopWords.First());
                                        }
                                        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
                                        Highlighter hg = helper.GetHighlighter(query, "data",
                                                                               new StandardAnalyzer(TEST_VERSION,
                                                                                                    stopWords).
                                                                                   TokenStream(
                                                                                       "data",
                                                                                       new StringReader(sb.ToString())),
                                                                               fm); // new Highlighter(fm,
                                        // new
                                        // QueryTermScorer(query));
                                        hg.TextFragmenter = new NullFragmenter();
                                        hg.MaxDocCharsToAnalyze = 100;
                                        match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "data",
                                                                   sb.ToString());
                                        Assert.IsTrue(match.Length < hg.MaxDocCharsToAnalyze,
                                                      "Matched text should be no more than 100 chars in length ");

                                        // add another tokenized word to the overrall length - but set way
                                        // beyond
                                        // the length of text under consideration (after a large slug of stop
                                        // words
                                        // + whitespace)
                                        sb.Append(" ");
                                        sb.Append(goodWord);
                                        match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "data",
                                                                   sb.ToString());
                                        Assert.IsTrue(match.Length < hg.MaxDocCharsToAnalyze,
                                                      "Matched text should be no more than 100 chars in length ");
                                    };

            helper.Start();

        }
        public void TestMaxSizeHighlight()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        numHighlights = 0;
                                        DoSearching("meat");
                                        TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME,
                                                                                       new StringReader(texts[0]));
                                        Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME, tokenStream,
                                                                                        this);
                                        // new Highlighter(this, new
                                        // QueryTermScorer(query));
                                        highlighter.MaxDocCharsToAnalyze = 30;

                                        highlighter.GetBestFragment(tokenStream, texts[0]);
                                        Assert.IsTrue(numHighlights == 0,
                                                      "Setting MaxDocBytesToAnalyze should have prevented us from finding matches for this record: "
                                                      + numHighlights + " found");
                                    };

            helper.Start();
        }
        public void TestGetTextFragments()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        DoSearching("Kennedy");

                                        for (int i = 0; i < hits.TotalHits; i++)
                                        {
                                            var text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                                            var tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                                            var highlighter = helper.GetHighlighter(query, FIELD_NAME, tokenStream,
                                                                                    this); // new Highlighter(this, new
                                            // QueryTermScorer(query));
                                            highlighter.TextFragmenter = new SimpleFragmenter(20);
                                            var stringResults = highlighter.GetBestFragments(tokenStream, text, 10);

                                            tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));
                                            var fragmentResults = highlighter.GetBestTextFragments(tokenStream, text,
                                                                                                   true, 10);

                                            Assert.IsTrue(fragmentResults.Length == stringResults.Length,
                                                          "Failed to find correct number of text Fragments: " +
                                                          fragmentResults.Length + " vs " + stringResults.Length);
                                            for (int j = 0; j < stringResults.Length; j++)
                                            {
                                                Console.WriteLine(fragmentResults[j]);
                                                Assert.IsTrue(fragmentResults[j].ToString().Equals(stringResults[j]),
                                                              "Failed to find same text Fragments: " +
                                                              fragmentResults[j] + " found");

                                            }

                                        }
                                    };
            helper.Start();
        }
        public void TestOverlapAnalyzer()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        var synonyms = new HashMap<string, string>();
                                        synonyms["football"] = "soccer,footie";
                                        var analyzer = new SynonymAnalyzer(synonyms);
                                        var srchkey = "football";

                                        var s = "football-soccer in the euro 2004 footie competition";
                                        var parser = new QueryParser(TEST_VERSION, "bookid", analyzer);
                                        var query = parser.Parse(srchkey);

                                        var tokenStream = analyzer.TokenStream(null, new StringReader(s));

                                        var highlighter = helper.GetHighlighter(query, null, tokenStream, this);

                                        // Get 3 best fragments and seperate with a "..."
                                        tokenStream = analyzer.TokenStream(null, new StringReader(s));

                                        var result = highlighter.GetBestFragments(tokenStream, s, 3, "...");
                                        var expectedResult =
                                            "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
                                        Assert.IsTrue(expectedResult.Equals(result),
                                                      "overlapping analyzer should handle highlights OK, expected:" +
                                                      expectedResult + " actual:" + result);
                                    };

            helper.Start();

        }