public virtual void TestMaxSizeHighlightTruncates() { System.String goodWord = "goodtoken"; System.String[] stopWords = new System.String[]{"stoppedtoken"}; TermQuery query = new TermQuery(new Term("data", goodWord)); SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); Highlighter hg = new Highlighter(fm, new QueryScorer(query)); hg.SetTextFragmenter(new NullFragmenter()); System.String match = null; System.Text.StringBuilder sb = new System.Text.StringBuilder(); sb.Append(goodWord); for (int i = 0; i < 10000; i++) { sb.Append(" "); sb.Append(stopWords[0]); } hg.SetMaxDocBytesToAnalyze(100); match = hg.GetBestFragment(new StandardAnalyzer(stopWords), "data", sb.ToString()); Assert.IsTrue(match.Length < hg.GetMaxDocBytesToAnalyze(), "Matched text should be no more than 100 chars in length "); //add another tokenized word to the overrall length - but set way beyond //the length of text under consideration (after a large slug of stop words + whitespace) sb.Append(" "); sb.Append(goodWord); match = hg.GetBestFragment(new StandardAnalyzer(stopWords), "data", sb.ToString()); Assert.IsTrue(match.Length < hg.GetMaxDocBytesToAnalyze(), "Matched text should be no more than 100 chars in length "); }
public virtual void TestMaxSizeHighlight() { DoSearching("meat"); Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); highlighter.SetMaxDocBytesToAnalyze(30); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(texts[0])); highlighter.GetBestFragment(tokenStream, texts[0]); Assert.IsTrue(numHighlights == 0, "Setting MaxDocBytesToAnalyze should have prevented " + "us from finding matches for this record: " + numHighlights + " found"); }