public void TestOffByOne() { var helper = new TestHighlightRunner(); helper.TestAction = () => { TermQuery query = new TermQuery(new Term("data", "help")); Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(query)); hg.TextFragmenter = new NullFragmenter(); String match = null; match = hg.GetBestFragment(analyzer, "data", "help me [54-65]"); Assert.AreEqual(match, "<B>help</B> me [54-65]"); }; helper.Start(); }
public void TestGetBestFragmentsFilteredQuery() { var helper = new TestHighlightRunner(); helper.TestAction = () => { numHighlights = 0; TermRangeFilter rf = new TermRangeFilter("contents", "john", "john", true, true); SpanQuery[] clauses = { new SpanTermQuery(new Term("contents", "john")), new SpanTermQuery(new Term("contents", "kennedy")) }; SpanNearQuery snq = new SpanNearQuery(clauses, 1, true); FilteredQuery fq = new FilteredQuery(snq, rf); DoSearching(fq); helper.DoStandardHighlights(analyzer, searcher, hits, query, this); // Currently highlights "John" and "Kennedy" separately Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found"); }; helper.Start(); }
public void TestGetRangeFragments() { var helper = new TestHighlightRunner(); helper.TestAction = () => { numHighlights = 0; String queryString = FIELD_NAME + ":[kannedy TO kznnedy]"; // Need to explicitly set the QueryParser property to use TermRangeQuery // rather // than RangeFilters QueryParser parser = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer); parser.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; query = parser.Parse(queryString); DoSearching(query); helper.DoStandardHighlights(analyzer, searcher, hits, query, this); Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); }; helper.Start(); }
public void TestGetBestFragmentsPhrase() { var helper = new TestHighlightRunner(); helper.TestAction = () => { numHighlights = 0; DoSearching("\"John Kennedy\""); helper.DoStandardHighlights(analyzer, searcher, hits, query, this); // Currently highlights "John" and "Kennedy" separately Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found"); }; helper.Start(); }
public void TestGetTextFragments() { var helper = new TestHighlightRunner(); helper.TestAction = () => { DoSearching("Kennedy"); for (int i = 0; i < hits.TotalHits; i++) { var text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); var tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); var highlighter = helper.GetHighlighter(query, FIELD_NAME, tokenStream, this); // new Highlighter(this, new // QueryTermScorer(query)); highlighter.TextFragmenter = new SimpleFragmenter(20); var stringResults = highlighter.GetBestFragments(tokenStream, text, 10); tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); var fragmentResults = highlighter.GetBestTextFragments(tokenStream, text, true, 10); Assert.IsTrue(fragmentResults.Length == stringResults.Length, "Failed to find correct number of text Fragments: " + fragmentResults.Length + " vs " + stringResults.Length); for (int j = 0; j < stringResults.Length; j++) { Console.WriteLine(fragmentResults[j]); Assert.IsTrue(fragmentResults[j].ToString().Equals(stringResults[j]), "Failed to find same text Fragments: " + fragmentResults[j] + " found"); } } }; helper.Start(); }
public void TestNotSpanSimpleQuery() { DoSearching(new SpanNotQuery(new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "shot")), new SpanTermQuery(new Term(FIELD_NAME, "kennedy")) }, 3, false), new SpanTermQuery( new Term(FIELD_NAME, "john")))); var helper = new TestHighlightRunner(TestHighlightRunner.QUERY); helper.TestAction = () => helper.DoStandardHighlights(analyzer, searcher, hits, query, this); helper.Run(); Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found"); }
public void TestFieldSpecificHighlighting() { var helper = new TestHighlightRunner(); helper.TestAction = () => { var docMainText = "fred is one of the people"; var parser = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer); var query = parser.Parse("fred category:people"); // highlighting respects fieldnames used in query IScorer fieldSpecificScorer = null; if (helper.Mode == TestHighlightRunner.QUERY) { fieldSpecificScorer = new QueryScorer(query, FIELD_NAME); } else if (helper.Mode == TestHighlightRunner.QUERY_TERM) { fieldSpecificScorer = new QueryTermScorer(query, "contents"); } var fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldSpecificScorer) {TextFragmenter = new NullFragmenter()}; String result = fieldSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the people", "Should match"); // highlighting does not respect fieldnames used in query IScorer fieldInSpecificScorer = null; if (helper.Mode == TestHighlightRunner.QUERY) { fieldInSpecificScorer = new QueryScorer(query, null); } else if (helper.Mode == TestHighlightRunner.QUERY_TERM) { fieldInSpecificScorer = new QueryTermScorer(query); } var fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldInSpecificScorer) {TextFragmenter = new NullFragmenter()}; result = fieldInSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the <B>people</B>", "Should match"); reader.Close(); }; helper.Start(); }
public void TestGetBestSingleFragment() { var helper = new TestHighlightRunner(); helper.TestAction = () => { DoSearching("Kennedy"); numHighlights = 0; for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME, tokenStream, this); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragment(tokenStream, text); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found"); numHighlights = 0; for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME, tokenStream, this); highlighter.GetBestFragment(analyzer, FIELD_NAME, text); } Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found"); numHighlights = 0; for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME, tokenStream, this); highlighter.GetBestFragments(analyzer, FIELD_NAME, text, 10); } Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found"); }; helper.Start(); }
public void TestUnRewrittenQuery() { var helper = new TestHighlightRunner(); helper.TestAction = () => { numHighlights = 0; // test to show how rewritten query can still be used searcher = new IndexSearcher(ramDir, true); Analyzer analyzer = new StandardAnalyzer(TEST_VERSION); QueryParser parser = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer); Query query = parser.Parse("JF? or Kenned*"); Console.WriteLine("Searching with primitive query"); // forget to set this and... // query=query.Rewrite(reader); TopDocs hits = searcher.Search(query, null, 1000); // create an instance of the highlighter with the tags used to surround // highlighted text // QueryHighlightExtractor highlighter = new // QueryHighlightExtractor(this, // query, new StandardAnalyzer(TEST_VERSION)); int maxNumFragmentsRequired = 3; for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME, tokenStream, this, false); highlighter.TextFragmenter = new SimpleFragmenter(40); String highlightedText = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine(highlightedText); } // We expect to have zero highlights if the query is multi-terms and is // not // rewritten! Assert.IsTrue(numHighlights == 0, "Failed to find correct number of highlights " + numHighlights + " found"); }; helper.Start(); }
public void TestNoFragments() { var helper = new TestHighlightRunner(); helper.TestAction = () => { DoSearching("AnInvalidQueryWhichShouldYieldNoResults"); foreach (string text in texts) { TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME, tokenStream, this); String result = highlighter.GetBestFragment(tokenStream, text); Assert.IsNull(result, "The highlight result should be null for text with no query terms"); } }; helper.Start(); }
public void TestMaxSizeEndHighlight() { var helper = new TestHighlightRunner(); helper.TestAction = () => { var stopWords = Support.Compatibility.SetFactory.CreateHashSet(new[] {"in", "it"}); TermQuery query = new TermQuery(new Term("text", "searchterm")); String text = "this is a text with searchterm in it"; SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); Highlighter hg = helper.GetHighlighter(query, "text", new StandardAnalyzer(TEST_VERSION, stopWords). TokenStream("text", new StringReader(text)), fm); hg.TextFragmenter = new NullFragmenter(); hg.MaxDocCharsToAnalyze = 36; String match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "text", text); Assert.IsTrue(match.EndsWith("in it"), "Matched text should contain remainder of text after highlighted query "); }; helper.Start(); }
public void TestMaxSizeHighlightTruncates() { var helper = new TestHighlightRunner(); helper.TestAction = () => { var goodWord = "goodtoken"; var stopWords = Support.Compatibility.SetFactory.CreateHashSet(new[] { "stoppedtoken" }); var query = new TermQuery(new Term("data", goodWord)); string match; StringBuilder sb = new StringBuilder(); sb.Append(goodWord); for (int i = 0; i < 10000; i++) { sb.Append(" "); // only one stopword sb.Append(stopWords.First()); } SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); Highlighter hg = helper.GetHighlighter(query, "data", new StandardAnalyzer(TEST_VERSION, stopWords). TokenStream( "data", new StringReader(sb.ToString())), fm); // new Highlighter(fm, // new // QueryTermScorer(query)); hg.TextFragmenter = new NullFragmenter(); hg.MaxDocCharsToAnalyze = 100; match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "data", sb.ToString()); Assert.IsTrue(match.Length < hg.MaxDocCharsToAnalyze, "Matched text should be no more than 100 chars in length "); // add another tokenized word to the overrall length - but set way // beyond // the length of text under consideration (after a large slug of stop // words // + whitespace) sb.Append(" "); sb.Append(goodWord); match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "data", sb.ToString()); Assert.IsTrue(match.Length < hg.MaxDocCharsToAnalyze, "Matched text should be no more than 100 chars in length "); }; helper.Start(); }
public void TestMaxSizeHighlight() { var helper = new TestHighlightRunner(); helper.TestAction = () => { numHighlights = 0; DoSearching("meat"); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(texts[0])); Highlighter highlighter = helper.GetHighlighter(query, FIELD_NAME, tokenStream, this); // new Highlighter(this, new // QueryTermScorer(query)); highlighter.MaxDocCharsToAnalyze = 30; highlighter.GetBestFragment(tokenStream, texts[0]); Assert.IsTrue(numHighlights == 0, "Setting MaxDocBytesToAnalyze should have prevented us from finding matches for this record: " + numHighlights + " found"); }; helper.Start(); }
public void TestGetBestFragmentsFilteredPhraseQuery() { var helper = new TestHighlightRunner(); helper.TestAction = () => { numHighlights = 0; var rf = new TermRangeFilter("contents", "john", "john", true, true); var pq = new PhraseQuery(); pq.Add(new Term("contents", "john")); pq.Add(new Term("contents", "kennedy")); var fq = new FilteredQuery(pq, rf); DoSearching(fq); helper.DoStandardHighlights(analyzer, searcher, hits, query, this); // Currently highlights "John" and "Kennedy" separately Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found"); }; helper.Start(); }
public void TestOverlapAnalyzer2() { var helper = new TestHighlightRunner(); helper.TestAction = () => { String s = "Hi-Speed10 foo"; Query query; Highlighter highlighter; String result; query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("foo"); highlighter = helper.GetHighlighter(query, "text", getTS2(), this); result = highlighter.GetBestFragments(getTS2(), s, 3, "..."); Assert.AreEqual(result, "Hi-Speed10 <B>foo</B>"); query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("10"); highlighter = helper.GetHighlighter(query, "text", getTS2(), this); result = highlighter.GetBestFragments(getTS2(), s, 3, "..."); Assert.AreEqual(result, "Hi-Speed<B>10</B> foo"); query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("hi"); highlighter = helper.GetHighlighter(query, "text", getTS2(), this); result = highlighter.GetBestFragments(getTS2(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi</B>-Speed10 foo"); query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse( "speed"); highlighter = helper.GetHighlighter(query, "text", getTS2(), this); result = highlighter.GetBestFragments(getTS2(), s, 3, "..."); Assert.AreEqual(result, "Hi-<B>Speed</B>10 foo"); query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse( "hispeed"); highlighter = helper.GetHighlighter(query, "text", getTS2(), this); result = highlighter.GetBestFragments(getTS2(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo"); query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse( "hi speed"); highlighter = helper.GetHighlighter(query, "text", getTS2(), this); result = highlighter.GetBestFragments(getTS2(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo"); // ///////////////// same tests, just put the bigger overlapping token // first query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("foo"); highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this); result = highlighter.GetBestFragments(GetTS2A(), s, 3, "..."); Assert.AreEqual(result, "Hi-Speed10 <B>foo</B>"); query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("10"); highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this); result = highlighter.GetBestFragments(GetTS2A(), s, 3, "..."); Assert.AreEqual(result, "Hi-Speed<B>10</B> foo"); query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse("hi"); highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this); result = highlighter.GetBestFragments(GetTS2A(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi</B>-Speed10 foo"); query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse( "speed"); highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this); result = highlighter.GetBestFragments(GetTS2A(), s, 3, "..."); Assert.AreEqual(result, "Hi-<B>Speed</B>10 foo"); query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse( "hispeed"); highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this); result = highlighter.GetBestFragments(GetTS2A(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo"); query = new QueryParser(TEST_VERSION, "text", new WhitespaceAnalyzer()).Parse( "hi speed"); highlighter = helper.GetHighlighter(query, "text", GetTS2A(), this); result = highlighter.GetBestFragments(GetTS2A(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo"); }; helper.Start(); }
public void TestGetBestFragmentsWithOr() { var helper = new TestHighlightRunner(); helper.TestAction = () => { numHighlights = 0; DoSearching("JFK OR Kennedy"); helper.DoStandardHighlights(analyzer, searcher, hits, query, this); Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); }; helper.Start(); }
public void TestSpanHighlighting() { Query query1 = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "wordx")), new SpanTermQuery(new Term(FIELD_NAME, "wordy")) }, 1, false); Query query2 = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "wordy")), new SpanTermQuery(new Term(FIELD_NAME, "wordc")) }, 1, false); BooleanQuery bquery = new BooleanQuery(); bquery.Add(query1, Occur.SHOULD); bquery.Add(query2, Occur.SHOULD); DoSearching(bquery); var helper = new TestHighlightRunner(TestHighlightRunner.QUERY); helper.TestAction = () => helper.DoStandardHighlights(analyzer, searcher, hits, query, this); helper.Run(); Assert.IsTrue(numHighlights == 7, "Failed to find correct number of highlights " + numHighlights + " found"); }
public void TestGetBestSingleFragmentWithWeights() { var helper = new TestHighlightRunner(); helper.TestAction = () => { WeightedSpanTerm[] wTerms = new WeightedSpanTerm[2]; wTerms[0] = new WeightedSpanTerm(10f, "hello"); var positionSpans = new List<PositionSpan> {new PositionSpan(0, 0)}; wTerms[0].AddPositionSpans(positionSpans); wTerms[1] = new WeightedSpanTerm(1f, "kennedy"); positionSpans = new List<PositionSpan> {new PositionSpan(14, 14)}; wTerms[1].AddPositionSpans(positionSpans); Highlighter highlighter = helper.GetHighlighter(wTerms, this); // new // Highlighter(new // QueryTermScorer(wTerms)); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(texts[0])); highlighter.TextFragmenter = new SimpleFragmenter(2); String result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim(); Assert.IsTrue("<B>Hello</B>".Equals(result), "Failed to find best section using weighted terms. Found: [" + result + "]"); // readjust weights wTerms[1].Weight = 50f; tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(texts[0])); highlighter = helper.GetHighlighter(wTerms, this); highlighter.TextFragmenter = new SimpleFragmenter(2); result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim(); Assert.IsTrue("<B>kennedy</B>".Equals(result), "Failed to find best section using weighted terms. Found: " + result); }; helper.Start(); }
public void TestOverlapAnalyzer() { var helper = new TestHighlightRunner(); helper.TestAction = () => { var synonyms = new HashMap<string, string>(); synonyms["football"] = "soccer,footie"; var analyzer = new SynonymAnalyzer(synonyms); var srchkey = "football"; var s = "football-soccer in the euro 2004 footie competition"; var parser = new QueryParser(TEST_VERSION, "bookid", analyzer); var query = parser.Parse(srchkey); var tokenStream = analyzer.TokenStream(null, new StringReader(s)); var highlighter = helper.GetHighlighter(query, null, tokenStream, this); // Get 3 best fragments and seperate with a "..." tokenStream = analyzer.TokenStream(null, new StringReader(s)); var result = highlighter.GetBestFragments(tokenStream, s, 3, "..."); var expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition"; Assert.IsTrue(expectedResult.Equals(result), "overlapping analyzer should handle highlights OK, expected:" + expectedResult + " actual:" + result); }; helper.Start(); }