public string HighlightContent(string text) { QueryScorer scorer = new QueryScorer(GetQuery()); Formatter formatter = new SimpleHTMLFormatter("<span style='color:maroon; font-weight:bold;'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SimpleFragmenter(120)); TokenStream stream = new StandardAnalyzer().TokenStream("Content", new StringReader(text)); var fragments = highlighter.GetBestFragments(stream, text, 3); if (fragments == null || fragments.Length == 0) return text.Length > 120 ? text.Substring(0, 120) + "..." : text; string highlighted = ""; foreach (var fragment in fragments) { if (text.StartsWith(fragment)) highlighted += "<p>" + fragment + " ... </p>"; else if (text.EndsWith(fragment)) highlighted += "<p> ... " + fragment + "</p>"; else highlighted += "<p> ... " + fragment + " ... </p>"; } return highlighted; }
public IEnumerable<Hit> Search(string query, int maxResults) { var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); QueryParser qp = new QueryParser( Lucene.Net.Util.Version.LUCENE_29, "contents", analyzer ); Query q = qp.Parse(query); TopDocs top = searcher.Search(q, maxResults); List<Hit> result = new List<Hit>(top.totalHits); for (int index = 0; index < top.totalHits; index++) { var doc = searcher.Doc(top.scoreDocs[index].doc); string contents = doc.Get("contents"); var scorer = new QueryScorer(q, searcher.GetIndexReader(), "contents"); var highlighter = new Highlighter(scorer); result.Add(new Hit() { Relevance = top.scoreDocs[index].score, Title = doc.Get("title"), Url = doc.Get("path"), Excerpt = highlighter.GetBestFragment(analyzer, "contents", contents) }); } return result; }
public string GetHighlight(string value, IndexSearcher searcher, string highlightField, Query luceneQuery) { var scorer = new QueryScorer(luceneQuery.Rewrite(searcher.GetIndexReader())); var highlighter = new Highlighter(HighlightFormatter, scorer); var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value)); return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator); }
public string GetHighlight(string value, string highlightField, Searcher searcher, string luceneRawQuery) { var query = GetQueryParser(highlightField).Parse(luceneRawQuery); var scorer = new QueryScorer(searcher.Rewrite(query)); var highlighter = new Highlighter(HighlightFormatter, scorer); var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value)); string bestFragments = highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator); return bestFragments; }
public string HighlightTitle(string text) { QueryScorer scorer = new QueryScorer(GetQuery()); Formatter formatter = new SimpleHTMLFormatter("<span style='color:maroon; font-weight:bold;'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new NullFragmenter()); TokenStream stream = new StandardAnalyzer().TokenStream("Title", new StringReader(text)); var title = highlighter.GetBestFragment(stream, text); return title ?? text; }
public IEnumerable<SearchResult> Search(string query) { Analyzer analyzer = new SnowballAnalyzer("English"); QueryParser parser = new QueryParser("text", analyzer); Query luceneQuery = parser.Parse(query); Directory directory = FSDirectory.GetDirectory(indexPath); IndexSearcher searcher = new IndexSearcher(directory); QueryScorer queryScorer = new QueryScorer(luceneQuery); Highlighter highlighter = new Highlighter(queryScorer); TopDocs topDocs = searcher.Search(luceneQuery, 100); var searchResults = new List<SearchResult>(); foreach (ScoreDoc scoreDoc in topDocs.scoreDocs) { Document doc = searcher.Doc(scoreDoc.doc); searchResults.Add(new SearchResult { Path = doc.Get("path"), Score = scoreDoc.score, Title = doc.Get("title"), Preview = highlighter.GetBestFragment(analyzer, "text", doc.Get("text")) }); } return searchResults; }
public IEnumerable<SearchResult> Search(string luceneQuery, int maxResults = 500, string highlightOpenTag = null, string highlightCloseTag = null, params string[] fieldsToHighlight) { var results = new List<SearchResult>(); if (String.IsNullOrWhiteSpace(luceneQuery)) return results; var parser = new MultiFieldQueryParser(LuceneVersion, searchFields, new StandardAnalyzer(LuceneVersion)); Query query = parser.Parse(luceneQuery); TopDocs topDocs = searcher.Search(query, maxResults); foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs) { Document document = reader.Document(scoreDoc.doc); var result = new SearchResult(document, scoreDoc.score); results.Add(result); } if(!String.IsNullOrEmpty(highlightOpenTag) && !String.IsNullOrEmpty(highlightCloseTag) && fieldsToHighlight.Length > 0) { var scorer = new QueryScorer(query); var formatter = new SimpleHTMLFormatter(highlightOpenTag, highlightCloseTag); var highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SimpleFragmenter()); foreach (SearchResult result in results) { foreach (string highlightField in fieldsToHighlight) { if(!result.Fields.ContainsKey(highlightField)) continue; string fieldValue = result[highlightField]; TokenStream stream = new StandardAnalyzer(LuceneVersion).TokenStream(highlightField, new StringReader(fieldValue)); string highlightedFieldValue = highlighter.GetBestFragments(stream, fieldValue, 500, "..."); if (!String.IsNullOrWhiteSpace(highlightedFieldValue)) { result.Fields[highlightField] = highlightedFieldValue; } } } } return results; }
private string GeneratePreviewText(Query q, string text) { var scorer = new QueryScorer(q); var formatter = new SimpleHTMLFormatter("<em>", "</em>"); var highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SimpleFragmenter(250)); var stream = new StandardAnalyzer(Version.LUCENE_29).TokenStream("bodyText", new StringReader(text)); return highlighter.GetBestFragments(stream, text, 3, "..."); }
private SearchResults Query(Query query, string cachePath, int offset, int resultAmount, bool includeContentSnippets, int limit) { SearchResults results = new SearchResults(); Lucene.Net.Store.FSDirectory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(_indexPath)); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); // Request all results up to the page we actually need (this is quick) TopDocs topDocs = searcher.Search(query, limit); ScoreDoc[] hits = topDocs.scoreDocs; // Save num results results.NumResults = hits.Length; // Only loop through the hits that should be on the page for (int i = offset; i < hits.Length && i < offset + resultAmount; i++) { int docId = hits[i].doc; Document doc = searcher.Doc(docId); if (includeContentSnippets) { // Read the whole file from the cache to find the content snippet. string filepath = CacheManager.GetRelativeCacheFileName(doc.Get("uri"), "GET"); string documentContent = Utils.ReadFileAsString(cachePath + filepath); // Remove unusable stuff. documentContent = HtmlUtils.RemoveHead(documentContent); documentContent = HtmlUtils.ExtractText(documentContent); // Find (and highlight) content snippets QueryScorer scorer = new QueryScorer(query); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SentenceFragmenter()); TokenStream stream = _analyzer.TokenStream("content", new StringReader(documentContent)); // Get 1 fragment string contentSnippet = ""; try { string[] fragments = highlighter.GetBestFragments(stream, documentContent, 1); if (fragments.Length > 0) { contentSnippet = HtmlUtils.StripTagsCharArray(fragments[0], false); // If the content snippet does end in mid of a sentence, let's append "..." if (!new char[] { '.', '!', '?' }.Contains(contentSnippet[contentSnippet.Length - 1])) { contentSnippet += "..."; } } } catch (Exception) { } results.AddLuceneDocument(doc, contentSnippet); } else { results.AddLuceneDocument(doc); } } searcher.Close(); return results; }