private static void HighlightDocuments(List <Document> documents, TopDocs hits, Query query, IndexSearcher searcher, Analyzer analyzer) { var htmlFormatter = new SimpleHTMLFormatter("<u><b>", "</b></u>"); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); for (int i = 0; i < hits.ScoreDocs.Length && i < 10; i++) { int id = hits.ScoreDocs[i].Doc; string titleHighlightedext = HighlightText(id, "title", highlighter, searcher, analyzer); string contentHighlightedText = HighlightText(id, "content", highlighter, searcher, analyzer); string authorHighlightedText = HighlightText(id, "author", highlighter, searcher, analyzer); if (titleHighlightedext != "") { documents[i].Title = titleHighlightedext; } if (contentHighlightedText != "") { documents[i].Content = contentHighlightedText; } if (authorHighlightedText != "") { documents[i].Author = authorHighlightedText; } } }
private LuceneHighlightHelper() { Separator = "..."; MaxNumHighlights = 5; HighlightAnalyzer = new StandardAnalyzer(_luceneVersion); HighlightFormatter = new SimpleHTMLFormatter("<em>", "</em> "); }
public SearchResult <PostDto> SearchPage(int page, int size, string keyword) { var cacheKey = $"search:{keyword}:{page}:{size}"; var result = _cacheManager.GetOrAdd(cacheKey, _ => { var searchResult = SearchEngine.ScoredSearch <Post>(BuildSearchOptions(page, size, keyword)); using var entities = searchResult.Results.Where(s => s.Entity.Status == Status.Published).DistinctBy(s => s.Entity.Id).ToPooledList(); var ids = entities.Select(s => s.Entity.Id).ToArray(); var dic = GetQuery <PostDto>(p => ids.Contains(p.Id)).ToDictionary(p => p.Id); var posts = entities.Where(s => dic.ContainsKey(s.Entity.Id)).Select(s => dic[s.Entity.Id]).ToList(); var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>"); var highlighter = new Highlighter(simpleHtmlFormatter, new Segment()) { FragmentSize = 200 }; var keywords = Searcher.CutKeywords(keyword); HighlightSegment(posts, keywords, highlighter); SolvePostsCategory(posts); return(new SearchResult <PostDto>() { Results = posts, Elapsed = searchResult.Elapsed, Total = searchResult.TotalHits }); }); return(result); }
private IList <int> Search(string text, int tipodocumentoId, string startDate, string endDate) { var directory = this.GetDirectory(); var indexReader = this.GetIndexReader(directory); var searcher = new IndexSearcher(indexReader); try { var query = this.BuildQuery(text, tipodocumentoId); var filter = this.BuildDateFilter(startDate, endDate); var sort = new Sort(new SortField("dataCriacao", SortField.LONG, true)); var docs = searcher.Search(query, filter, this.configuracoesDaAplicacao.ResultadoMaximoConsulta, sort); // create highlighter var formatter = new SimpleHTMLFormatter("<span class=\"result-highlight\">", "</span>"); var scorer = new QueryScorer(query); this.Highlighter = new Highlighter(formatter, scorer); this.Stream = LuceneEngineBase.GetAnalyzer().TokenStream(string.Empty, new StringReader(text)); return(this.BuildSearchResult(docs, searcher)); } finally { searcher.Dispose(); indexReader.Dispose(); directory.Dispose(); } }
/// <summary> /// Gets the highlighted wildcard text. /// </summary> /// <param name="indexField">The index field value.</param> /// <param name="searchQuery">The search query.</param> /// <param name="highlightField">The highlight field name.</param> /// <param name="examineIndexSetName">Name of the examine index set.</param> /// <param name="maxNumFragments">Maximum number of fragments to retrieve.</param> /// <param name="preTag">Highlight pre tag.</param> /// <param name="postTag">Highlight post tag.</param> /// <returns></returns> public static string GetHighlightWithWildcards(string indexField, string searchQuery, string highlightField, string examineIndexSetName, int maxNumFragments, string preTag, string postTag) { if (indexField == null) { throw new ArgumentNullException(nameof(indexField)); } if (searchQuery == null) { throw new ArgumentNullException(nameof(searchQuery)); } if (highlightField == null) { throw new ArgumentNullException(nameof(highlightField)); } if (examineIndexSetName == null) { throw new ArgumentNullException(nameof(examineIndexSetName)); } BooleanQuery finalQuery = new BooleanQuery(); foreach (string term in searchQuery.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)) { FuzzyQuery fuzzyQuery = new FuzzyQuery(new Lucene.Net.Index.Term(highlightField, term), 0.5f, 0); finalQuery.Add(new BooleanClause(fuzzyQuery, BooleanClause.Occur.SHOULD)); } string indexFieldStrippedHtmlValue = indexField.StripHtml(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(preTag, postTag); QueryScorer fragmentScorer = new QueryScorer(finalQuery.Rewrite(GetIndexSearcher(examineIndexSetName).GetIndexReader())); Highlighter highlighter = new Highlighter(formatter, fragmentScorer); TokenStream tokenStream = new StandardAnalyzer(Version.LUCENE_29).TokenStream(highlightField, new StringReader(indexFieldStrippedHtmlValue)); return(highlighter.GetBestFragments(tokenStream, indexFieldStrippedHtmlValue, maxNumFragments, "...")); }
public IEnumerable <Tuple <float, Document, string[]> > Search(string text, string defaultField = "title", int maxResultCount = 500) { var parser = new QueryParser(Constants.Version, defaultField, _analyzer); Query query = parser.Parse(text ?? string.Empty); var formatter = new SimpleHTMLFormatter(string.Empty, string.Empty); var fragmenter = new SimpleFragmenter(120); var scorer = new QueryScorer(query); var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = fragmenter }; using (var directory = FSDirectory.Open(new DirectoryInfo(_path), new NoLockFactory())) { using (var searcher = new IndexSearcher(IndexReader.Open(directory, ReadonlyMode))) { TopDocs hits = searcher.Search(query, maxResultCount); foreach (var scoreDoc in hits.ScoreDocs) { Document doc = searcher.Doc(scoreDoc.Doc); var field = doc.Get(defaultField); var tokenStream = _analyzer.TokenStream(defaultField, new StringReader(field)); var framgents = highlighter.GetBestFragments(tokenStream, field, 5); yield return(new Tuple <float, Document, string[]>(scoreDoc.Score, doc, framgents)); } } } }
public static string GenerateHtmlPreviewText(Query query, string text, int length, Analyzer analyzer, string prefix = "<label class='highlight'>", string suffix = "</label>", bool returnRawContentWhenResultIsEmpty = false, int maxContentHighlightLength = Constants.DefaultMaxContentHighlightLength) { string result = null; if (text.Length <= maxContentHighlightLength) // For performance { if (query != null) { var scorer = new QueryScorer(query); var formatter = new SimpleHTMLFormatter(CodeContentProcessing.HighLightPrefix, CodeContentProcessing.HighLightSuffix); var highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(length); highlighter.MaxDocCharsToAnalyze = maxContentHighlightLength; var stream = analyzer.GetTokenStream(nameof(CodeSource.Content), new StringReader(text)); result = highlighter.GetBestFragments(stream, text, 3, "..."); } result = string.IsNullOrEmpty(result) ? (returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty) : HttpUtility.HtmlEncode(result).Replace(CodeContentProcessing.HighLightPrefix, prefix).Replace(CodeContentProcessing.HighLightSuffix, suffix); } else { result = "Content is too long to highlight"; } return(result); }
/// <summary> /// Gets the highlight. /// </summary> /// <param name="indexField">The index field value.</param> /// <param name="searchQuery">The search query.</param> /// <param name="highlightField">The highlight field name.</param> /// <param name="examineIndexSetName">Name of the examine index set.</param> /// <param name="maxNumFragments">Maximum number of fragments to retrieve.</param> /// <param name="preTag">Highlight pre tag.</param> /// <param name="postTag">Highlight post tag.</param> /// <returns></returns> public static string GetHighlight(string indexField, string searchQuery, string highlightField, string examineIndexSetName, int maxNumFragments, string preTag, string postTag) { if (indexField == null) { throw new ArgumentNullException(nameof(indexField)); } if (searchQuery == null) { throw new ArgumentNullException(nameof(searchQuery)); } if (highlightField == null) { throw new ArgumentNullException(nameof(highlightField)); } if (examineIndexSetName == null) { throw new ArgumentNullException(nameof(examineIndexSetName)); } string indexFieldStrippedHtmlValue = indexField.StripHtml(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(preTag, postTag); Highlighter highlighter = new Highlighter(formatter, FragmentScorer(searchQuery, highlightField, examineIndexSetName)); TokenStream tokenStream = new StandardAnalyzer(Version.LUCENE_29).TokenStream(highlightField, new StringReader(indexFieldStrippedHtmlValue)); return(highlighter.GetBestFragments(tokenStream, indexFieldStrippedHtmlValue, maxNumFragments, "...")); }
/// <summary> /// 查询所有符合条件的内容 /// </summary> /// <param name="kw">关键词</param> /// <param name="segment">提取长度</param> /// <returns></returns> public static IEnumerable <PostOutputDto> Search(string kw, int segment = 200) { if (string.IsNullOrEmpty(IndexPath)) { throw new Exception("未设置索引文件夹路径,参数名:" + IndexPath); } string indexPath = IndexPath; using (var analyzer = new PanGuAnalyzer()) { var list = CutKeywords(kw); var result = new ConcurrentQueue <PostOutputDto>(); Parallel.ForEach(list, k => { if (k.Contains(new[] { @"\?", @"\*", @"\+", @"\-", @"\[", @"\]", @"\{", @"\}", @"\(", @"\)", "�" })) { return; } FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); var searcher = new IndexSearcher(reader); QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, new[] { nameof(Post.Id), nameof(Post.Title), nameof(Post.Content), nameof(Post.Author), nameof(Post.Label), nameof(Post.Email), nameof(Post.Keyword) }, analyzer); //多个字段查询 Query query = parser.Parse(k); int n = 100000; TopDocs docs = searcher.Search(query, null, n); if (docs?.TotalHits != 0 && docs?.ScoreDocs != null) { foreach (ScoreDoc sd in docs.ScoreDocs) //遍历搜索到的结果 { Document doc = searcher.Doc(sd.Doc); if (result.Any(p => p.Id == doc.Get(nameof(Post.Id)).ToInt32())) { continue; } var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>"); var highlighter = new Highlighter(simpleHtmlFormatter, new Segment()) { FragmentSize = segment }; var content = doc.Get(nameof(Post.Content)); if (content.Length <= segment) { segment = content.Length; } result.Enqueue(new PostOutputDto() { Id = doc.Get(nameof(Post.Id)).ToInt32(), Title = doc.Get(nameof(Post.Title)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Title))) : doc.Get(nameof(Post.Title)), Content = content.ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, content) : content.Substring(0, segment), Author = doc.Get(nameof(Post.Author)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Author))) : doc.Get(nameof(Post.Author)), Label = doc.Get(nameof(Post.Label)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Label))) : doc.Get(nameof(Post.Label)), Email = doc.Get(nameof(Post.Email)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Email))) : doc.Get(nameof(Post.Email)), Keyword = doc.Get(nameof(Post.Keyword)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Keyword))) : doc.Get(nameof(Post.Keyword)) }); } } }); return(result.Where(p => !string.IsNullOrEmpty(p.Title)).DistinctBy(p => p.Id)); } }
/// <summary> /// 文本语法高亮的方法 /// </summary> /// <param name="text">输入的文本</param> /// <param name="keys">搜索关键字 需要高亮的文本</param> /// <param name="analyEnum">选择分词方式</param> /// <returns>成功返回高亮的文本 失败返回空字符串</returns> public static string HightLightText(string text, string keys, AnalyzerEnum analyEnum) { string _hightText = string.Empty; SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); Highlighter highlighter = null; switch (analyEnum) { case AnalyzerEnum.SimpleAnalyzer: highlighter = new Highlighter(simpleHTMLFormatter, new SimpleAnalyzer()); break; case AnalyzerEnum.EnglishAnalyzer: EnglishAnalyzer engEa = new EnglishAnalyzer(); engEa.Init(); highlighter = new Highlighter(simpleHTMLFormatter, engEa); break; case AnalyzerEnum.PanGuSegment: highlighter = new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer()); break; default: break; } if (highlighter != null) { highlighter.FragmentSize = 500000; _hightText = highlighter.GetBestFragment(keys, text); } return(_hightText); }
private USNLuceneHelper() { Separator = "..."; MaxNumHighlights = 5; HighlightAnalyzer = new StandardAnalyzer(_luceneVersion); HighlightFormatter = new SimpleHTMLFormatter("<span class=\"label label-primary\">", "</span>"); }
public SearchResult <PostOutputDto> SearchPage(int page, int size, string keyword) { var cacheKey = $"search:{keyword}:{page}:{size}"; if (_memoryCache.TryGetValue <SearchResult <PostOutputDto> >(cacheKey, out var value)) { return(value); } var searchResult = _searchEngine.ScoredSearch <Post>(new SearchOptions(keyword, page, size, typeof(Post))); var posts = searchResult.Results.Select(p => p.Entity.Mapper <PostOutputDto>()).Where(p => p.Status == Status.Pended).ToList(); var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>"); var highlighter = new Highlighter(simpleHtmlFormatter, new Segment()) { FragmentSize = 200 }; var keywords = _searcher.CutKeywords(keyword); foreach (var p in posts) { foreach (var s in keywords) { string frag; if (p.Title.Contains(s) && !string.IsNullOrEmpty(frag = highlighter.GetBestFragment(s, p.Title))) { p.Title = frag; break; } } bool handled = false; foreach (var s in keywords) { string frag; if (p.Content.Contains(s) && !string.IsNullOrEmpty(frag = highlighter.GetBestFragment(s, p.Content))) { p.Content = frag; handled = true; break; } } if (p.Content.Length > 200 && !handled) { p.Content = p.Content.Substring(0, 200); } } var result = new SearchResult <PostOutputDto>() { Results = posts, Elapsed = searchResult.Elapsed, Total = searchResult.TotalHits }; return(_memoryCache.Set(cacheKey, result, TimeSpan.FromHours(1))); }
/** GET HIGHLIGHTER FRAGMENT*/ public static string[] TextHighlighter(Query query, string text, StandardAnalyzer analys, IndexSearcher searcher, ScoreDoc doc) { QueryScorer scorer = new QueryScorer(query); SimpleHTMLFormatter formater = new SimpleHTMLFormatter("<b>", "</b>"); Highlighter highlighter = new Highlighter(formater, scorer); TokenStream tokenStream = TokenSources.GetAnyTokenStream(searcher.IndexReader, doc.Doc, "Contents", analys); string[] frags = highlighter.GetBestFragments(tokenStream, text, 3); return(frags); }
private static string Preview(string body, string keyword) { var formatter = new SimpleHTMLFormatter("<font color=\"Red\">", "</font>"); var highlighter = new Highlighter(formatter, new Segment()); highlighter.FragmentSize = 120; string fragment = highlighter.GetBestFragment(keyword, body); return(fragment); }
/// <summary> /// 设置关键词高亮 /// </summary> /// <param name="content"></param> /// <param name="field"></param> /// <param name="query"></param> /// <returns></returns> protected string SetHighKeyWord(string content, string field, Query query) { var formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>"); var keywords = new HashSet <Term>(); query.ExtractTerms(keywords); var lighter = new Highlighter(formatter, new QueryScorer(query)); return(lighter.GetBestFragment(new JiebaMergeTokenizer(keywords.Select(k => k.Text()), new StringReader(content)), content)); }
/// <summary> /// Creates a highlighter for current query. /// </summary> private Highlighter CreateHighlighter(Query query) { var formatter = new SimpleHTMLFormatter("<b>", "</b>"); var scorer = new QueryScorer(query); return(new Highlighter(formatter, scorer) { TextFragmenter = new SimpleSpanFragmenter(scorer, 150) }); }
public string GenerateHtmlPreviewText(SearchRequest searchRequest, string text, int length, string prefix = "<span class='highlight'>", string suffix = "</span>", bool returnRawContentWhenResultIsEmpty = false) { if (searchRequest == null) { return(returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty); } var maintainer = GetIndexMaintainerWrapper(searchRequest.IndexPk); if (maintainer == null) { return(string.Empty); } var queryForContent = GetContentQuery(searchRequest, maintainer); string result = null; var maxContentHighlightLength = maintainer.IndexConfig.MaxContentHighlightLength; if (maxContentHighlightLength <= 0) { maxContentHighlightLength = Constants.DefaultMaxContentHighlightLength; } if (text.Length <= maxContentHighlightLength) // For performance { if (queryForContent != null) { var scorer = new QueryScorer(queryForContent); var formatter = new SimpleHTMLFormatter(HighLightPrefix, HighLightSuffix); var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = new SimpleFragmenter(length), MaxDocCharsToAnalyze = maxContentHighlightLength }; using var stream = GetTokenStream(text, searchRequest.CaseSensitive); result = highlighter.GetBestFragments(stream, text, 3, "..."); } result = string.IsNullOrEmpty(result) ? (returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty) : HttpUtility.HtmlEncode(result).Replace(HighLightPrefix, prefix).Replace(HighLightSuffix, suffix); } else { result = "Content is too long to highlight"; } return(result); }
/// <summary> /// 搜索结果高亮显示 /// </summary> /// <param name="keyword">关键字</param> /// <param name="content">搜索结果</param> /// <param name="analyzer">new SimpleAnalyzer()</param> /// <returns></returns> public static string HighLight(string keyword, string content, Analyzer analyzer) { const string FIELD_NAME = "keyword"; Query query = new QueryParserEx(Lucene.Net.Util.Version.LUCENE_30, FIELD_NAME, analyzer).Parse(keyword); QueryScorer scorer = new QueryScorer(query); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(PRE_TAG, END_TAG); SimpleSpanFragmenter fragment = new SimpleSpanFragmenter(scorer); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragment; return(highlighter.GetBestFragment(analyzer, FIELD_NAME, content) ?? content); }
/// <summary> /// 搜索结果高亮显示 /// </summary> /// <param name="keyword"> 关键字 </param> /// <param name="content"> 搜索结果 </param> /// <returns> 高亮后结果 </returns> public static string HighLight(string keyword, string content) { // 创建HTMLFormatter,参数为高亮单词的前后缀 var simpleHtmlFormatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); // 创建 Highlighter ,输入HTMLFormatter 和 盘古分词对象Semgent var highlighter = new Highlighter(simpleHtmlFormatter, new Segment()); //设置每个摘要段的字符数 highlighter.FragmentSize = 100; //获取最匹配的摘要段 return(highlighter.GetBestFragment(keyword, content)); }
public List <News> Search(string keywords) { Directory dir = FSDirectory.Open(new io.DirectoryInfo(HttpContext.Current.Server.MapPath("/Indexs/")), new SimpleFSLockFactory()); IndexReader reader = IndexReader.Open(dir, true); IndexSearcher search = new IndexSearcher(reader); MultiFieldQueryParser multifield = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { "Title", "Content" }, new PanGuAnalyzer()); multifield.PhraseSlop = 3; multifield.DefaultOperator = QueryParser.Operator.AND; Query muqu = multifield.Parse(keywords); //MultiPhraseQuery multi = new MultiPhraseQuery(); //multi.Add(new Term[] {new Term("Content","中国"), new Term("Content", "智慧"), new Term("Title", "中国"), new Term("Title", "智慧") }); //PhraseQuery query = new PhraseQuery(); //query.Add(new Term("Content", keywords)); NumericRangeFilter <int> filter = NumericRangeFilter.NewIntRange("NewsId", 1, 10, true, true); Sort sort = new Sort(); sort.SetSort(new SortField("OrderId", SortField.LONG, true)); TopFieldDocs fields = search.Search(muqu, filter, 1000, sort); ScoreDoc[] docs = fields.ScoreDocs; List <News> newslist = new List <News>(); for (int i = 0; i < docs.Length; i++) { News news = new News(); Document doc = search.Doc(docs[i].Doc); news.NewsId = Convert.ToInt32(doc.Get("NewsId")); news.Title = doc.Get("Title"); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span style=\"color:red\">", "</span>"); Highlighter high = new Highlighter(formatter, new PanGu.Segment()); high.FragmentSize = 120; news.Content = high.GetBestFragment(keywords, doc.Get("Content")); news.AddTime = Convert.ToDateTime(doc.Get("Date")); news.OrderId = Convert.ToInt64(doc.Get("OrderId")); newslist.Add(news); } return(newslist); }
public SearchResult <PostDto> SearchPage(int page, int size, string keyword) { var cacheKey = $"search:{keyword}:{page}:{size}"; if (_cacheManager.Exists(cacheKey)) { return(_cacheManager.Get(cacheKey)); } var searchResult = SearchEngine.ScoredSearch <Post>(BuildSearchOptions(page, size, keyword)); var entities = searchResult.Results.Where(s => s.Entity.Status == Status.Published).ToList(); var ids = entities.Select(s => s.Entity.Id).ToArray(); var dic = GetQuery <PostDto>(p => ids.Contains(p.Id)).ToDictionary(p => p.Id); var posts = entities.Select(s => { //var mapper = new AutoMapper.Mapper(); var item = _mapper.Map <PostDto>(s.Entity) ; //new PostDto (); //s.Entity.Mapper<PostDto>() ; if (dic.ContainsKey(item.Id)) { item.CategoryName = dic[item.Id].CategoryName; item.ModifyDate = dic[item.Id].ModifyDate; item.CommentCount = dic[item.Id].CommentCount; item.TotalViewCount = dic[item.Id].TotalViewCount; item.CategoryId = dic[item.Id].CategoryId; } return(item); }).ToList(); var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>"); var highlighter = new Highlighter(simpleHtmlFormatter, new Segment()) { FragmentSize = 200 }; var keywords = Searcher.CutKeywords(keyword); HighlightSegment(posts, keywords, highlighter); var result = new SearchResult <PostDto>() { Results = posts, Elapsed = searchResult.Elapsed, Total = searchResult.TotalHits }; _cacheManager.Add(cacheKey, result); _cacheManager.Expire(cacheKey, TimeSpan.FromHours(1)); return(result); }
/// <summary> /// 创建HTMLFormatter,参数为高亮单词的前后缀 /// </summary> /// <param name="keywords"></param> /// <param name="Content"></param> /// <returns></returns> public static string CreateHightLight(string keywords, string Content) { SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); //创建Highlighter ,输入HTMLFormatter 和盘古分词对象Semgent Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new Segment()) { //设置每个摘要段的字符数 FragmentSize = 150 }; //获取最匹配的摘要段 return(highlighter.GetBestFragment(keywords, Content)); }
/// <summary> /// 设置高亮 /// </summary> /// <param name="keywords">关键字</param> /// <param name="matchField">字段信息</param> /// <returns></returns> public void SetHighLight(string keywords, LnMatchField matchField) { var format = new SimpleHTMLFormatter(matchField.KeywordPrefix, matchField.KeywordSuffix); var highLighter = new Highlighter(format, new Segment()); highLighter.FragmentSize = matchField.FragmentSize < 1 ? this.Value.Length : matchField.FragmentSize; var newValue = highLighter.GetBestFragment(keywords, this.Value); if (string.IsNullOrEmpty(newValue) == false) { this.Value = newValue; } }
/// <summary> /// 文章高亮关键词处理 /// </summary> /// <param name="p"></param> /// <param name="keyword"></param> public async Task Highlight(Post p, string keyword) { try { var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>"); var highlighter = new Highlighter(simpleHtmlFormatter, new Segment()) { FragmentSize = int.MaxValue }; keyword = Regex.Replace(keyword, @"<|>|\(|\)|\{|\}|\[|\]", " "); var keywords = Searcher.CutKeywords(keyword); var context = BrowsingContext.New(Configuration.Default); var document = await context.OpenAsync(req => req.Content(p.Content)); var elements = document.DocumentElement.GetElementsByTagName("p"); foreach (var e in elements) { for (var index = 0; index < e.ChildNodes.Length; index++) { var node = e.ChildNodes[index]; bool handled = false; foreach (var s in keywords) { string frag; if (handled == false && node.TextContent.Contains(s, StringComparison.CurrentCultureIgnoreCase) && !string.IsNullOrEmpty(frag = highlighter.GetBestFragment(s, node.TextContent))) { switch (node) { case IElement el: el.InnerHtml = frag; handled = true; break; case IText t: var parser = new HtmlParser(); var parseDoc = parser.ParseDocument(frag).Body; e.ReplaceChild(parseDoc, t); handled = true; break; } } } } } p.Content = document.Body.InnerHtml; } catch (Exception e) { Console.WriteLine(e); } }
private string HighlightContents(Query q, string text) { string highlightStartTag = "<span class='highlight'>"; string highlightEndTag = "</span>"; int fragmentLength = 150; QueryScorer scorer = new QueryScorer(q, Body); Formatter formatter = new SimpleHTMLFormatter(highlightStartTag, highlightEndTag); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SimpleFragmenter(fragmentLength)); TokenStream stream = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29).TokenStream(Body, new StringReader(text)); return(highlighter.GetBestFragments(stream, text, 3, "...")); }
/// <summary> /// Highlights the field. /// </summary> /// <param name="fieldName">Name of the field.</param> /// <param name="fieldValue">The field value.</param> /// <param name="startTag">The start tag.</param> /// <param name="endTag">The end tag.</param> /// <param name="fragmentLength">Length of the fragment.</param> /// <param name="numberOfFragments">The number of fragments.</param> /// <returns></returns> protected string[] HighlightField(string fieldName, string fieldValue, string startTag = "<strong>", string endTag = "</strong>", int fragmentLength = 150, int numberOfFragments = 1) { var scorer = new Lucene.Net.Search.Highlight.QueryScorer(Query); IFormatter formatter = new SimpleHTMLFormatter(startTag, endTag); var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = new SimpleFragmenter(fragmentLength) }; var sr = new StringReader(fieldValue); var specificAnalyzer = GetAnalyzer(fieldName); TokenStream stream = specificAnalyzer.TokenStream(fieldName, sr); return(highlighter.GetBestFragments(stream, fieldValue, numberOfFragments)); }
private SearchResult SearchCore(string query, string[] fields, int topResultCount) { // Search var parser = new MultiFieldQueryParser(Config.LuceneVersion, fields, Analyzer); parser.AllowLeadingWildcard = true; parser.DefaultOperator = Operator.AND; parser.Locale = Config.Locale; parser.AnalyzeRangeTerms = true; var q = parser.Parse(query); var results = Searcher.Search(q, topResultCount); var hits = results.ScoreDocs; if (results.TotalHits == 0) { return(SearchResult.Empty); } // Format var items = new List <SearchResultItem>(); var scorer = new QueryScorer(q); var formatter = new SimpleHTMLFormatter("<mark>", "</mark>"); var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = new SimpleFragmenter(Config.FragmentLength) }; // ReSharper disable once ForCanBeConvertedToForeach for (var i = 0; i < hits.Length; i++) { var doc = Searcher.Doc(hits[i].Doc); var url = doc.Get("url"); var title = doc.Get("title"); var content = doc.Get("content"); using (var stream = Analyzer.GetTokenStream(url, new StringReader(content))) { var preview = highlighter.GetBestFragments(stream, content, Config.ResultFragments, Config.FragmentSeparator); var item = new SearchResultItem(url, ToWbrWrapName(title), preview); items.Add(item); } } return(new SearchResult(results.TotalHits, items)); }
/// <summary> /// 搜索结果高亮显示 /// PS:需要添加PanGu.HighLight.dll的引用 /// </summary> /// <param name="keyword"> 关键字 </param> /// <param name="content"> 搜索结果 </param> /// <returns> 高亮后结果 </returns> public static string HighLight(string keyword, string content) { // 创建HTMLFormatter,参数为高亮单词的前后缀 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font style=\"font-style:normal;font-weight:bold;color:#cc0000;\"><b>", "</b></font>"); // 创建 Highlighter ,输入HTMLFormatter 和 盘古分词对象Semgent Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new Segment()); // 设置每个摘要段的字符数 highlighter.FragmentSize = 1000; // 获取最匹配的摘要段 return(highlighter.GetBestFragment(keyword, content)); }
public string GeneratePreviewText(Lucene.Net.Search.Query q, string text) { QueryScorer scorer = new QueryScorer(q); IFormatter formatter = new SimpleHTMLFormatter("", ""); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(100); TokenStream stream = new SimpleAnalyzer().TokenStream(TEXT_FN, new StringReader(text)); string fragment = highlighter.GetBestFragments(stream, text, 2, "..."); if (string.IsNullOrEmpty(fragment)) { fragment = text.Substring(0, 100); } return(fragment); }
private object GetResultData(IndexSearcher searcher, TopDocs docs, Query query) { SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); highlighter.TextFragmenter = new SimpleFragmenter(150); Analyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search); var result = new List <Book>(); foreach (ScoreDoc sd in docs.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); var author = highlighter.GetBestFragment(analyzer, "Author", doc.Get("Author")); if (string.IsNullOrWhiteSpace(author)) { author = doc.Get("Author"); } var name = highlighter.GetBestFragment(analyzer, "Name", doc.Get("Name")); if (string.IsNullOrWhiteSpace(name)) { name = doc.Get("Name"); } var fileName = highlighter.GetBestFragment(analyzer, "FileName", doc.Get("FileName")); if (string.IsNullOrWhiteSpace(fileName)) { fileName = doc.Get("FileName"); } var content = highlighter.GetBestFragment(analyzer, "Content", doc.Get("Content")); if (string.IsNullOrWhiteSpace(content)) { content = doc.Get("Content"); } result.Add(new Book() { Id = doc.Get("Id"), Author = author, Name = name, FileName = fileName, Content = content }); } return(new { Items = result, TotalCount = docs.TotalHits }); }
public ArrayList DataToList(DataSet ds,string keyWords,bool isHighlight) { ArrayList result = new ArrayList(); foreach (System.Data.DataRow row in ds.Tables[0].Rows) { Model.DDocInfo doc = new TMM.Model.DDocInfo(); doc.Title = row["Title"].ToString(); doc.Description = row["Description"].ToString(); doc.DocType = row["DocType"].ToString(); doc.DocId = int.Parse( row["DocumentId"].ToString() ); doc.UserId = int.Parse( row["UserId"].ToString()); doc.CreateTime = DateTime.Parse( row["CreateTime"].ToString()); doc.UpCount = int.Parse(row["UpCount"].ToString()); doc.ViewCount = int.Parse(row["ViewCount"].ToString()); if (isHighlight) { SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); Highlighter titleHighlighter; Highlighter contentHighlighter; if (titleAnalyzerName.Equals("PanGuSegment", StringComparison.CurrentCultureIgnoreCase)) { titleHighlighter = new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer()); } else if (titleAnalyzerName.Equals("EnglishAnalyzer", StringComparison.CurrentCultureIgnoreCase)) { titleHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.EnglishAnalyzer()); } else { titleHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.SimpleAnalyzer()); } if (descAnalyzerName.Equals("PanGuSegment", StringComparison.CurrentCultureIgnoreCase)) { contentHighlighter = new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer()); } else if (descAnalyzerName.Equals("EnglishAnalyzer", StringComparison.CurrentCultureIgnoreCase)) { contentHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.EnglishAnalyzer()); } else { contentHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.SimpleAnalyzer()); } titleHighlighter.FragmentSize = 50; contentHighlighter.FragmentSize = 50; doc.SearchSummary = contentHighlighter.GetBestFragment(keyWords, doc.Description); string titleHighlight = titleHighlighter.GetBestFragment(keyWords, doc.Title); if (!string.IsNullOrEmpty(titleHighlight)) { doc.Title = titleHighlight; } } result.Add(doc); } return result; }
public void TestMaxSizeHighlightTruncates() { var helper = new TestHighlightRunner(); helper.TestAction = () => { var goodWord = "goodtoken"; var stopWords = Support.Compatibility.SetFactory.CreateHashSet(new[] { "stoppedtoken" }); var query = new TermQuery(new Term("data", goodWord)); string match; StringBuilder sb = new StringBuilder(); sb.Append(goodWord); for (int i = 0; i < 10000; i++) { sb.Append(" "); // only one stopword sb.Append(stopWords.First()); } SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); Highlighter hg = helper.GetHighlighter(query, "data", new StandardAnalyzer(TEST_VERSION, stopWords). TokenStream( "data", new StringReader(sb.ToString())), fm); // new Highlighter(fm, // new // QueryTermScorer(query)); hg.TextFragmenter = new NullFragmenter(); hg.MaxDocCharsToAnalyze = 100; match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "data", sb.ToString()); Assert.IsTrue(match.Length < hg.MaxDocCharsToAnalyze, "Matched text should be no more than 100 chars in length "); // add another tokenized word to the overrall length - but set way // beyond // the length of text under consideration (after a large slug of stop // words // + whitespace) sb.Append(" "); sb.Append(goodWord); match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "data", sb.ToString()); Assert.IsTrue(match.Length < hg.MaxDocCharsToAnalyze, "Matched text should be no more than 100 chars in length "); }; helper.Start(); }
public void TestMaxSizeEndHighlight() { var helper = new TestHighlightRunner(); helper.TestAction = () => { var stopWords = Support.Compatibility.SetFactory.CreateHashSet(new[] {"in", "it"}); TermQuery query = new TermQuery(new Term("text", "searchterm")); String text = "this is a text with searchterm in it"; SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); Highlighter hg = helper.GetHighlighter(query, "text", new StandardAnalyzer(TEST_VERSION, stopWords). TokenStream("text", new StringReader(text)), fm); hg.TextFragmenter = new NullFragmenter(); hg.MaxDocCharsToAnalyze = 36; String match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "text", text); Assert.IsTrue(match.EndsWith("in it"), "Matched text should contain remainder of text after highlighted query "); }; helper.Start(); }
/* * This method intended for use with <tt>testHighlightingWithDefaultField()</tt> * @throws InvalidTokenOffsetsException */ private static String HighlightField(Query query, String fieldName, String text) { TokenStream tokenStream = new StandardAnalyzer(TEST_VERSION).TokenStream(fieldName, new StringReader(text)); // Assuming "<B>", "</B>" used to highlight SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(int.MaxValue); String rv = highlighter.GetBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)"); return rv.Length == 0 ? text : rv; }