示例#1
0
        private static void HighlightDocuments(List <Document> documents, TopDocs hits, Query query, IndexSearcher searcher, Analyzer analyzer)
        {
            var         htmlFormatter = new SimpleHTMLFormatter("<u><b>", "</b></u>");
            Highlighter highlighter   = new Highlighter(htmlFormatter, new QueryScorer(query));


            for (int i = 0; i < hits.ScoreDocs.Length && i < 10; i++)
            {
                int id = hits.ScoreDocs[i].Doc;

                string titleHighlightedext    = HighlightText(id, "title", highlighter, searcher, analyzer);
                string contentHighlightedText = HighlightText(id, "content", highlighter, searcher, analyzer);
                string authorHighlightedText  = HighlightText(id, "author", highlighter, searcher, analyzer);

                if (titleHighlightedext != "")
                {
                    documents[i].Title = titleHighlightedext;
                }
                if (contentHighlightedText != "")
                {
                    documents[i].Content = contentHighlightedText;
                }
                if (authorHighlightedText != "")
                {
                    documents[i].Author = authorHighlightedText;
                }
            }
        }
示例#2
0
 private LuceneHighlightHelper()
 {
     Separator          = "...";
     MaxNumHighlights   = 5;
     HighlightAnalyzer  = new StandardAnalyzer(_luceneVersion);
     HighlightFormatter = new SimpleHTMLFormatter("<em>", "</em>&nbsp;");
 }
示例#3
0
        public SearchResult <PostDto> SearchPage(int page, int size, string keyword)
        {
            var cacheKey = $"search:{keyword}:{page}:{size}";
            var result   = _cacheManager.GetOrAdd(cacheKey, _ =>
            {
                var searchResult        = SearchEngine.ScoredSearch <Post>(BuildSearchOptions(page, size, keyword));
                using var entities      = searchResult.Results.Where(s => s.Entity.Status == Status.Published).DistinctBy(s => s.Entity.Id).ToPooledList();
                var ids                 = entities.Select(s => s.Entity.Id).ToArray();
                var dic                 = GetQuery <PostDto>(p => ids.Contains(p.Id)).ToDictionary(p => p.Id);
                var posts               = entities.Where(s => dic.ContainsKey(s.Entity.Id)).Select(s => dic[s.Entity.Id]).ToList();
                var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>");
                var highlighter         = new Highlighter(simpleHtmlFormatter, new Segment())
                {
                    FragmentSize = 200
                };
                var keywords = Searcher.CutKeywords(keyword);
                HighlightSegment(posts, keywords, highlighter);
                SolvePostsCategory(posts);
                return(new SearchResult <PostDto>()
                {
                    Results = posts,
                    Elapsed = searchResult.Elapsed,
                    Total = searchResult.TotalHits
                });
            });

            return(result);
        }
示例#4
0
        private IList <int> Search(string text, int tipodocumentoId, string startDate, string endDate)
        {
            var directory   = this.GetDirectory();
            var indexReader = this.GetIndexReader(directory);
            var searcher    = new IndexSearcher(indexReader);

            try
            {
                var query  = this.BuildQuery(text, tipodocumentoId);
                var filter = this.BuildDateFilter(startDate, endDate);

                var sort = new Sort(new SortField("dataCriacao", SortField.LONG, true));

                var docs = searcher.Search(query, filter, this.configuracoesDaAplicacao.ResultadoMaximoConsulta, sort);

                // create highlighter
                var formatter = new SimpleHTMLFormatter("<span class=\"result-highlight\">", "</span>");
                var scorer    = new QueryScorer(query);
                this.Highlighter = new Highlighter(formatter, scorer);
                this.Stream      = LuceneEngineBase.GetAnalyzer().TokenStream(string.Empty, new StringReader(text));

                return(this.BuildSearchResult(docs, searcher));
            }
            finally
            {
                searcher.Dispose();
                indexReader.Dispose();
                directory.Dispose();
            }
        }
        /// <summary>
        /// Gets the highlighted wildcard text.
        /// </summary>
        /// <param name="indexField">The index field value.</param>
        /// <param name="searchQuery">The search query.</param>
        /// <param name="highlightField">The highlight field name.</param>
        /// <param name="examineIndexSetName">Name of the examine index set.</param>
        /// <param name="maxNumFragments">Maximum number of fragments to retrieve.</param>
        /// <param name="preTag">Highlight pre tag.</param>
        /// <param name="postTag">Highlight post tag.</param>
        /// <returns></returns>
        public static string GetHighlightWithWildcards(string indexField, string searchQuery, string highlightField, string examineIndexSetName, int maxNumFragments, string preTag, string postTag)
        {
            if (indexField == null)
            {
                throw new ArgumentNullException(nameof(indexField));
            }
            if (searchQuery == null)
            {
                throw new ArgumentNullException(nameof(searchQuery));
            }
            if (highlightField == null)
            {
                throw new ArgumentNullException(nameof(highlightField));
            }
            if (examineIndexSetName == null)
            {
                throw new ArgumentNullException(nameof(examineIndexSetName));
            }

            BooleanQuery finalQuery = new BooleanQuery();

            foreach (string term in searchQuery.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
            {
                FuzzyQuery fuzzyQuery = new FuzzyQuery(new Lucene.Net.Index.Term(highlightField, term), 0.5f, 0);
                finalQuery.Add(new BooleanClause(fuzzyQuery, BooleanClause.Occur.SHOULD));
            }

            string indexFieldStrippedHtmlValue = indexField.StripHtml();
            SimpleHTMLFormatter formatter      = new SimpleHTMLFormatter(preTag, postTag);
            QueryScorer         fragmentScorer = new QueryScorer(finalQuery.Rewrite(GetIndexSearcher(examineIndexSetName).GetIndexReader()));
            Highlighter         highlighter    = new Highlighter(formatter, fragmentScorer);
            TokenStream         tokenStream    = new StandardAnalyzer(Version.LUCENE_29).TokenStream(highlightField, new StringReader(indexFieldStrippedHtmlValue));

            return(highlighter.GetBestFragments(tokenStream, indexFieldStrippedHtmlValue, maxNumFragments, "..."));
        }
        public IEnumerable <Tuple <float, Document, string[]> > Search(string text, string defaultField = "title", int maxResultCount = 500)
        {
            var   parser = new QueryParser(Constants.Version, defaultField, _analyzer);
            Query query  = parser.Parse(text ?? string.Empty);

            var formatter   = new SimpleHTMLFormatter(string.Empty, string.Empty);
            var fragmenter  = new SimpleFragmenter(120);
            var scorer      = new QueryScorer(query);
            var highlighter = new Highlighter(formatter, scorer)
            {
                TextFragmenter = fragmenter
            };

            using (var directory = FSDirectory.Open(new DirectoryInfo(_path), new NoLockFactory()))
            {
                using (var searcher = new IndexSearcher(IndexReader.Open(directory, ReadonlyMode)))
                {
                    TopDocs hits = searcher.Search(query, maxResultCount);

                    foreach (var scoreDoc in hits.ScoreDocs)
                    {
                        Document doc = searcher.Doc(scoreDoc.Doc);

                        var field       = doc.Get(defaultField);
                        var tokenStream = _analyzer.TokenStream(defaultField, new StringReader(field));
                        var framgents   = highlighter.GetBestFragments(tokenStream, field, 5);

                        yield return(new Tuple <float, Document, string[]>(scoreDoc.Score, doc, framgents));
                    }
                }
            }
        }
示例#7
0
        public static string GenerateHtmlPreviewText(Query query, string text, int length, Analyzer analyzer, string prefix = "<label class='highlight'>", string suffix = "</label>", bool returnRawContentWhenResultIsEmpty = false, int maxContentHighlightLength = Constants.DefaultMaxContentHighlightLength)
        {
            string result = null;

            if (text.Length <= maxContentHighlightLength) // For performance
            {
                if (query != null)
                {
                    var scorer    = new QueryScorer(query);
                    var formatter = new SimpleHTMLFormatter(CodeContentProcessing.HighLightPrefix, CodeContentProcessing.HighLightSuffix);

                    var highlighter = new Highlighter(formatter, scorer);
                    highlighter.TextFragmenter       = new SimpleFragmenter(length);
                    highlighter.MaxDocCharsToAnalyze = maxContentHighlightLength;

                    var stream = analyzer.GetTokenStream(nameof(CodeSource.Content), new StringReader(text));

                    result = highlighter.GetBestFragments(stream, text, 3, "...");
                }

                result = string.IsNullOrEmpty(result) ?
                         (returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty)
                        : HttpUtility.HtmlEncode(result).Replace(CodeContentProcessing.HighLightPrefix, prefix).Replace(CodeContentProcessing.HighLightSuffix, suffix);
            }
            else
            {
                result = "Content is too long to highlight";
            }

            return(result);
        }
        /// <summary>
        /// Gets the highlight.
        /// </summary>
        /// <param name="indexField">The index field value.</param>
        /// <param name="searchQuery">The search query.</param>
        /// <param name="highlightField">The highlight field name.</param>
        /// <param name="examineIndexSetName">Name of the examine index set.</param>
        /// <param name="maxNumFragments">Maximum number of fragments to retrieve.</param>
        /// <param name="preTag">Highlight pre tag.</param>
        /// <param name="postTag">Highlight post tag.</param>
        /// <returns></returns>
        public static string GetHighlight(string indexField, string searchQuery, string highlightField, string examineIndexSetName, int maxNumFragments, string preTag, string postTag)
        {
            if (indexField == null)
            {
                throw new ArgumentNullException(nameof(indexField));
            }
            if (searchQuery == null)
            {
                throw new ArgumentNullException(nameof(searchQuery));
            }
            if (highlightField == null)
            {
                throw new ArgumentNullException(nameof(highlightField));
            }
            if (examineIndexSetName == null)
            {
                throw new ArgumentNullException(nameof(examineIndexSetName));
            }

            string indexFieldStrippedHtmlValue = indexField.StripHtml();
            SimpleHTMLFormatter formatter      = new SimpleHTMLFormatter(preTag, postTag);
            Highlighter         highlighter    = new Highlighter(formatter, FragmentScorer(searchQuery, highlightField, examineIndexSetName));
            TokenStream         tokenStream    = new StandardAnalyzer(Version.LUCENE_29).TokenStream(highlightField, new StringReader(indexFieldStrippedHtmlValue));

            return(highlighter.GetBestFragments(tokenStream, indexFieldStrippedHtmlValue, maxNumFragments, "..."));
        }
示例#9
0
        /// <summary>
        /// 查询所有符合条件的内容
        /// </summary>
        /// <param name="kw">关键词</param>
        /// <param name="segment">提取长度</param>
        /// <returns></returns>
        public static IEnumerable <PostOutputDto> Search(string kw, int segment = 200)
        {
            if (string.IsNullOrEmpty(IndexPath))
            {
                throw new Exception("未设置索引文件夹路径,参数名:" + IndexPath);
            }
            string indexPath = IndexPath;

            using (var analyzer = new PanGuAnalyzer())
            {
                var list   = CutKeywords(kw);
                var result = new ConcurrentQueue <PostOutputDto>();
                Parallel.ForEach(list, k =>
                {
                    if (k.Contains(new[] { @"\?", @"\*", @"\+", @"\-", @"\[", @"\]", @"\{", @"\}", @"\(", @"\)", "�" }))
                    {
                        return;
                    }
                    FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
                    IndexReader reader    = IndexReader.Open(directory, true);
                    var searcher          = new IndexSearcher(reader);
                    QueryParser parser    = new MultiFieldQueryParser(Version.LUCENE_30, new[] { nameof(Post.Id), nameof(Post.Title), nameof(Post.Content), nameof(Post.Author), nameof(Post.Label), nameof(Post.Email), nameof(Post.Keyword) }, analyzer); //多个字段查询
                    Query query           = parser.Parse(k);
                    int n        = 100000;
                    TopDocs docs = searcher.Search(query, null, n);
                    if (docs?.TotalHits != 0 && docs?.ScoreDocs != null)
                    {
                        foreach (ScoreDoc sd in docs.ScoreDocs) //遍历搜索到的结果
                        {
                            Document doc = searcher.Doc(sd.Doc);
                            if (result.Any(p => p.Id == doc.Get(nameof(Post.Id)).ToInt32()))
                            {
                                continue;
                            }
                            var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>");
                            var highlighter         = new Highlighter(simpleHtmlFormatter, new Segment())
                            {
                                FragmentSize = segment
                            };
                            var content = doc.Get(nameof(Post.Content));
                            if (content.Length <= segment)
                            {
                                segment = content.Length;
                            }
                            result.Enqueue(new PostOutputDto()
                            {
                                Id      = doc.Get(nameof(Post.Id)).ToInt32(),
                                Title   = doc.Get(nameof(Post.Title)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Title))) : doc.Get(nameof(Post.Title)),
                                Content = content.ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, content) : content.Substring(0, segment),
                                Author  = doc.Get(nameof(Post.Author)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Author))) : doc.Get(nameof(Post.Author)),
                                Label   = doc.Get(nameof(Post.Label)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Label))) : doc.Get(nameof(Post.Label)),
                                Email   = doc.Get(nameof(Post.Email)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Email))) : doc.Get(nameof(Post.Email)),
                                Keyword = doc.Get(nameof(Post.Keyword)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Keyword))) : doc.Get(nameof(Post.Keyword))
                            });
                        }
                    }
                });
                return(result.Where(p => !string.IsNullOrEmpty(p.Title)).DistinctBy(p => p.Id));
            }
        }
示例#10
0
        /// <summary>
        /// 文本语法高亮的方法
        /// </summary>
        /// <param name="text">输入的文本</param>
        /// <param name="keys">搜索关键字 需要高亮的文本</param>
        /// <param name="analyEnum">选择分词方式</param>
        /// <returns>成功返回高亮的文本 失败返回空字符串</returns>
        public static string HightLightText(string text, string keys, AnalyzerEnum analyEnum)
        {
            string _hightText = string.Empty;
            SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
            Highlighter         highlighter         = null;

            switch (analyEnum)
            {
            case AnalyzerEnum.SimpleAnalyzer:
                highlighter = new Highlighter(simpleHTMLFormatter, new SimpleAnalyzer());
                break;

            case AnalyzerEnum.EnglishAnalyzer:
                EnglishAnalyzer engEa = new EnglishAnalyzer();
                engEa.Init();
                highlighter = new Highlighter(simpleHTMLFormatter, engEa);
                break;

            case AnalyzerEnum.PanGuSegment:
                highlighter = new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer());
                break;

            default:
                break;
            }
            if (highlighter != null)
            {
                highlighter.FragmentSize = 500000;
                _hightText = highlighter.GetBestFragment(keys, text);
            }
            return(_hightText);
        }
 private USNLuceneHelper()
 {
     Separator          = "...";
     MaxNumHighlights   = 5;
     HighlightAnalyzer  = new StandardAnalyzer(_luceneVersion);
     HighlightFormatter = new SimpleHTMLFormatter("<span class=\"label label-primary\">", "</span>");
 }
示例#12
0
        public SearchResult <PostOutputDto> SearchPage(int page, int size, string keyword)
        {
            var cacheKey = $"search:{keyword}:{page}:{size}";

            if (_memoryCache.TryGetValue <SearchResult <PostOutputDto> >(cacheKey, out var value))
            {
                return(value);
            }

            var searchResult        = _searchEngine.ScoredSearch <Post>(new SearchOptions(keyword, page, size, typeof(Post)));
            var posts               = searchResult.Results.Select(p => p.Entity.Mapper <PostOutputDto>()).Where(p => p.Status == Status.Pended).ToList();
            var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>");
            var highlighter         = new Highlighter(simpleHtmlFormatter, new Segment())
            {
                FragmentSize = 200
            };
            var keywords = _searcher.CutKeywords(keyword);

            foreach (var p in posts)
            {
                foreach (var s in keywords)
                {
                    string frag;
                    if (p.Title.Contains(s) && !string.IsNullOrEmpty(frag = highlighter.GetBestFragment(s, p.Title)))
                    {
                        p.Title = frag;
                        break;
                    }
                }

                bool handled = false;
                foreach (var s in keywords)
                {
                    string frag;
                    if (p.Content.Contains(s) && !string.IsNullOrEmpty(frag = highlighter.GetBestFragment(s, p.Content)))
                    {
                        p.Content = frag;
                        handled   = true;
                        break;
                    }
                }

                if (p.Content.Length > 200 && !handled)
                {
                    p.Content = p.Content.Substring(0, 200);
                }
            }

            var result = new SearchResult <PostOutputDto>()
            {
                Results = posts,
                Elapsed = searchResult.Elapsed,
                Total   = searchResult.TotalHits
            };

            return(_memoryCache.Set(cacheKey, result, TimeSpan.FromHours(1)));
        }
示例#13
0
        /** GET HIGHLIGHTER FRAGMENT*/

        public static string[] TextHighlighter(Query query, string text, StandardAnalyzer analys, IndexSearcher searcher, ScoreDoc doc)
        {
            QueryScorer         scorer      = new QueryScorer(query);
            SimpleHTMLFormatter formater    = new SimpleHTMLFormatter("<b>", "</b>");
            Highlighter         highlighter = new Highlighter(formater, scorer);
            TokenStream         tokenStream = TokenSources.GetAnyTokenStream(searcher.IndexReader, doc.Doc, "Contents", analys);

            string[] frags = highlighter.GetBestFragments(tokenStream, text, 3);
            return(frags);
        }
示例#14
0
        private static string Preview(string body, string keyword)
        {
            var formatter   = new SimpleHTMLFormatter("<font color=\"Red\">", "</font>");
            var highlighter = new Highlighter(formatter, new Segment());

            highlighter.FragmentSize = 120;
            string fragment = highlighter.GetBestFragment(keyword, body);

            return(fragment);
        }
示例#15
0
        /// <summary>
        /// 设置关键词高亮
        /// </summary>
        /// <param name="content"></param>
        /// <param name="field"></param>
        /// <param name="query"></param>
        /// <returns></returns>
        protected string SetHighKeyWord(string content, string field, Query query)
        {
            var formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
            var keywords  = new HashSet <Term>();

            query.ExtractTerms(keywords);
            var lighter = new Highlighter(formatter, new QueryScorer(query));

            return(lighter.GetBestFragment(new JiebaMergeTokenizer(keywords.Select(k => k.Text()), new StringReader(content)), content));
        }
示例#16
0
        /// <summary>
        /// Creates a highlighter for current query.
        /// </summary>
        private Highlighter CreateHighlighter(Query query)
        {
            var formatter = new SimpleHTMLFormatter("<b>", "</b>");
            var scorer    = new QueryScorer(query);

            return(new Highlighter(formatter, scorer)
            {
                TextFragmenter = new SimpleSpanFragmenter(scorer, 150)
            });
        }
示例#17
0
        public string GenerateHtmlPreviewText(SearchRequest searchRequest, string text, int length, string prefix = "<span class='highlight'>", string suffix = "</span>", bool returnRawContentWhenResultIsEmpty = false)
        {
            if (searchRequest == null)
            {
                return(returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty);
            }

            var maintainer = GetIndexMaintainerWrapper(searchRequest.IndexPk);

            if (maintainer == null)
            {
                return(string.Empty);
            }

            var queryForContent = GetContentQuery(searchRequest, maintainer);

            string result = null;

            var maxContentHighlightLength = maintainer.IndexConfig.MaxContentHighlightLength;

            if (maxContentHighlightLength <= 0)
            {
                maxContentHighlightLength = Constants.DefaultMaxContentHighlightLength;
            }

            if (text.Length <= maxContentHighlightLength) // For performance
            {
                if (queryForContent != null)
                {
                    var scorer    = new QueryScorer(queryForContent);
                    var formatter = new SimpleHTMLFormatter(HighLightPrefix, HighLightSuffix);

                    var highlighter = new Highlighter(formatter, scorer)
                    {
                        TextFragmenter       = new SimpleFragmenter(length),
                        MaxDocCharsToAnalyze = maxContentHighlightLength
                    };

                    using var stream = GetTokenStream(text, searchRequest.CaseSensitive);

                    result = highlighter.GetBestFragments(stream, text, 3, "...");
                }

                result = string.IsNullOrEmpty(result) ?
                         (returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty)
                        : HttpUtility.HtmlEncode(result).Replace(HighLightPrefix, prefix).Replace(HighLightSuffix, suffix);
            }
            else
            {
                result = "Content is too long to highlight";
            }

            return(result);
        }
示例#18
0
        /// <summary>
        /// 搜索结果高亮显示
        /// </summary>
        /// <param name="keyword">关键字</param>
        /// <param name="content">搜索结果</param>
        /// <param name="analyzer">new SimpleAnalyzer()</param>
        /// <returns></returns>
        public static string HighLight(string keyword, string content, Analyzer analyzer)
        {
            const string         FIELD_NAME  = "keyword";
            Query                query       = new QueryParserEx(Lucene.Net.Util.Version.LUCENE_30, FIELD_NAME, analyzer).Parse(keyword);
            QueryScorer          scorer      = new QueryScorer(query);
            SimpleHTMLFormatter  formatter   = new SimpleHTMLFormatter(PRE_TAG, END_TAG);
            SimpleSpanFragmenter fragment    = new SimpleSpanFragmenter(scorer);
            Highlighter          highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = fragment;
            return(highlighter.GetBestFragment(analyzer, FIELD_NAME, content) ?? content);
        }
示例#19
0
        /// <summary>
        /// 搜索结果高亮显示
        /// </summary>
        /// <param name="keyword"> 关键字 </param>
        /// <param name="content"> 搜索结果 </param>
        /// <returns> 高亮后结果 </returns>
        public static string HighLight(string keyword, string content)
        {
            // 创建HTMLFormatter,参数为高亮单词的前后缀
            var simpleHtmlFormatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
            // 创建 Highlighter ,输入HTMLFormatter 和 盘古分词对象Semgent
            var highlighter = new Highlighter(simpleHtmlFormatter, new Segment());

            //设置每个摘要段的字符数
            highlighter.FragmentSize = 100;
            //获取最匹配的摘要段
            return(highlighter.GetBestFragment(keyword, content));
        }
        public List <News> Search(string keywords)
        {
            Directory     dir    = FSDirectory.Open(new io.DirectoryInfo(HttpContext.Current.Server.MapPath("/Indexs/")), new SimpleFSLockFactory());
            IndexReader   reader = IndexReader.Open(dir, true);
            IndexSearcher search = new IndexSearcher(reader);

            MultiFieldQueryParser multifield = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { "Title", "Content" }, new PanGuAnalyzer());

            multifield.PhraseSlop      = 3;
            multifield.DefaultOperator = QueryParser.Operator.AND;
            Query muqu = multifield.Parse(keywords);

            //MultiPhraseQuery multi = new MultiPhraseQuery();
            //multi.Add(new Term[] {new Term("Content","中国"), new Term("Content", "智慧"), new Term("Title", "中国"), new Term("Title", "智慧") });

            //PhraseQuery query = new PhraseQuery();
            //query.Add(new Term("Content", keywords));

            NumericRangeFilter <int> filter = NumericRangeFilter.NewIntRange("NewsId", 1, 10, true, true);

            Sort sort = new Sort();

            sort.SetSort(new SortField("OrderId", SortField.LONG, true));

            TopFieldDocs fields = search.Search(muqu, filter, 1000, sort);

            ScoreDoc[] docs = fields.ScoreDocs;

            List <News> newslist = new List <News>();

            for (int i = 0; i < docs.Length; i++)
            {
                News     news = new News();
                Document doc  = search.Doc(docs[i].Doc);
                news.NewsId = Convert.ToInt32(doc.Get("NewsId"));
                news.Title  = doc.Get("Title");

                SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span style=\"color:red\">", "</span>");
                Highlighter         high      = new Highlighter(formatter, new PanGu.Segment());
                high.FragmentSize = 120;
                news.Content      = high.GetBestFragment(keywords, doc.Get("Content"));



                news.AddTime = Convert.ToDateTime(doc.Get("Date"));
                news.OrderId = Convert.ToInt64(doc.Get("OrderId"));

                newslist.Add(news);
            }

            return(newslist);
        }
        public SearchResult <PostDto> SearchPage(int page, int size, string keyword)
        {
            var cacheKey = $"search:{keyword}:{page}:{size}";

            if (_cacheManager.Exists(cacheKey))
            {
                return(_cacheManager.Get(cacheKey));
            }

            var searchResult = SearchEngine.ScoredSearch <Post>(BuildSearchOptions(page, size, keyword));
            var entities     = searchResult.Results.Where(s => s.Entity.Status == Status.Published).ToList();
            var ids          = entities.Select(s => s.Entity.Id).ToArray();
            var dic          = GetQuery <PostDto>(p => ids.Contains(p.Id)).ToDictionary(p => p.Id);
            var posts        = entities.Select(s =>
            {
                //var mapper = new AutoMapper.Mapper();
                var item = _mapper.Map <PostDto>(s.Entity)
                ;
                //new  PostDto ();
                //s.Entity.Mapper<PostDto>()
                ;
                if (dic.ContainsKey(item.Id))
                {
                    item.CategoryName   = dic[item.Id].CategoryName;
                    item.ModifyDate     = dic[item.Id].ModifyDate;
                    item.CommentCount   = dic[item.Id].CommentCount;
                    item.TotalViewCount = dic[item.Id].TotalViewCount;
                    item.CategoryId     = dic[item.Id].CategoryId;
                }

                return(item);
            }).ToList();
            var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>");
            var highlighter         = new Highlighter(simpleHtmlFormatter, new Segment())
            {
                FragmentSize = 200
            };
            var keywords = Searcher.CutKeywords(keyword);

            HighlightSegment(posts, keywords, highlighter);

            var result = new SearchResult <PostDto>()
            {
                Results = posts,
                Elapsed = searchResult.Elapsed,
                Total   = searchResult.TotalHits
            };

            _cacheManager.Add(cacheKey, result);
            _cacheManager.Expire(cacheKey, TimeSpan.FromHours(1));
            return(result);
        }
示例#22
0
        /// <summary>
        /// 创建HTMLFormatter,参数为高亮单词的前后缀
        /// </summary>
        /// <param name="keywords"></param>
        /// <param name="Content"></param>
        /// <returns></returns>
        public static string CreateHightLight(string keywords, string Content)
        {
            SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
            //创建Highlighter ,输入HTMLFormatter 和盘古分词对象Semgent
            Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new Segment())
            {
                //设置每个摘要段的字符数
                FragmentSize = 150
            };

            //获取最匹配的摘要段
            return(highlighter.GetBestFragment(keywords, Content));
        }
示例#23
0
        /// <summary>
        /// 设置高亮
        /// </summary>
        /// <param name="keywords">关键字</param>
        /// <param name="matchField">字段信息</param>
        /// <returns></returns>
        public void SetHighLight(string keywords, LnMatchField matchField)
        {
            var format      = new SimpleHTMLFormatter(matchField.KeywordPrefix, matchField.KeywordSuffix);
            var highLighter = new Highlighter(format, new Segment());

            highLighter.FragmentSize = matchField.FragmentSize < 1 ? this.Value.Length : matchField.FragmentSize;
            var newValue = highLighter.GetBestFragment(keywords, this.Value);

            if (string.IsNullOrEmpty(newValue) == false)
            {
                this.Value = newValue;
            }
        }
示例#24
0
        /// <summary>
        /// 文章高亮关键词处理
        /// </summary>
        /// <param name="p"></param>
        /// <param name="keyword"></param>
        public async Task Highlight(Post p, string keyword)
        {
            try
            {
                var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>");
                var highlighter         = new Highlighter(simpleHtmlFormatter, new Segment())
                {
                    FragmentSize = int.MaxValue
                };
                keyword = Regex.Replace(keyword, @"<|>|\(|\)|\{|\}|\[|\]", " ");
                var keywords = Searcher.CutKeywords(keyword);
                var context  = BrowsingContext.New(Configuration.Default);
                var document = await context.OpenAsync(req => req.Content(p.Content));

                var elements = document.DocumentElement.GetElementsByTagName("p");
                foreach (var e in elements)
                {
                    for (var index = 0; index < e.ChildNodes.Length; index++)
                    {
                        var  node    = e.ChildNodes[index];
                        bool handled = false;
                        foreach (var s in keywords)
                        {
                            string frag;
                            if (handled == false && node.TextContent.Contains(s, StringComparison.CurrentCultureIgnoreCase) && !string.IsNullOrEmpty(frag = highlighter.GetBestFragment(s, node.TextContent)))
                            {
                                switch (node)
                                {
                                case IElement el:
                                    el.InnerHtml = frag;
                                    handled      = true;
                                    break;

                                case IText t:
                                    var parser   = new HtmlParser();
                                    var parseDoc = parser.ParseDocument(frag).Body;
                                    e.ReplaceChild(parseDoc, t);
                                    handled = true;
                                    break;
                                }
                            }
                        }
                    }
                }
                p.Content = document.Body.InnerHtml;
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
示例#25
0
        private string HighlightContents(Query q, string text)
        {
            string      highlightStartTag = "<span class='highlight'>";
            string      highlightEndTag   = "</span>";
            int         fragmentLength    = 150;
            QueryScorer scorer            = new QueryScorer(q, Body);
            Formatter   formatter         = new SimpleHTMLFormatter(highlightStartTag, highlightEndTag);
            Highlighter highlighter       = new Highlighter(formatter, scorer);

            highlighter.SetTextFragmenter(new SimpleFragmenter(fragmentLength));
            TokenStream stream = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29).TokenStream(Body, new StringReader(text));

            return(highlighter.GetBestFragments(stream, text, 3, "..."));
        }
示例#26
0
        /// <summary>
        /// Highlights the field.
        /// </summary>
        /// <param name="fieldName">Name of the field.</param>
        /// <param name="fieldValue">The field value.</param>
        /// <param name="startTag">The start tag.</param>
        /// <param name="endTag">The end tag.</param>
        /// <param name="fragmentLength">Length of the fragment.</param>
        /// <param name="numberOfFragments">The number of fragments.</param>
        /// <returns></returns>
        protected string[] HighlightField(string fieldName, string fieldValue, string startTag = "<strong>", string endTag = "</strong>", int fragmentLength = 150, int numberOfFragments = 1)
        {
            var        scorer      = new Lucene.Net.Search.Highlight.QueryScorer(Query);
            IFormatter formatter   = new SimpleHTMLFormatter(startTag, endTag);
            var        highlighter = new Highlighter(formatter, scorer)
            {
                TextFragmenter = new SimpleFragmenter(fragmentLength)
            };
            var         sr = new StringReader(fieldValue);
            var         specificAnalyzer = GetAnalyzer(fieldName);
            TokenStream stream           = specificAnalyzer.TokenStream(fieldName, sr);

            return(highlighter.GetBestFragments(stream, fieldValue, numberOfFragments));
        }
示例#27
0
        private SearchResult SearchCore(string query, string[] fields, int topResultCount)
        {
            // Search
            var parser = new MultiFieldQueryParser(Config.LuceneVersion, fields, Analyzer);

            parser.AllowLeadingWildcard = true;
            parser.DefaultOperator      = Operator.AND;
            parser.Locale            = Config.Locale;
            parser.AnalyzeRangeTerms = true;

            var q = parser.Parse(query);

            var results = Searcher.Search(q, topResultCount);
            var hits    = results.ScoreDocs;

            if (results.TotalHits == 0)
            {
                return(SearchResult.Empty);
            }

            // Format
            var items = new List <SearchResultItem>();

            var scorer      = new QueryScorer(q);
            var formatter   = new SimpleHTMLFormatter("<mark>", "</mark>");
            var highlighter = new Highlighter(formatter, scorer)
            {
                TextFragmenter = new SimpleFragmenter(Config.FragmentLength)
            };

            // ReSharper disable once ForCanBeConvertedToForeach
            for (var i = 0; i < hits.Length; i++)
            {
                var doc     = Searcher.Doc(hits[i].Doc);
                var url     = doc.Get("url");
                var title   = doc.Get("title");
                var content = doc.Get("content");

                using (var stream = Analyzer.GetTokenStream(url, new StringReader(content)))
                {
                    var preview = highlighter.GetBestFragments(stream, content, Config.ResultFragments, Config.FragmentSeparator);

                    var item = new SearchResultItem(url, ToWbrWrapName(title), preview);
                    items.Add(item);
                }
            }

            return(new SearchResult(results.TotalHits, items));
        }
示例#28
0
        /// <summary>
        /// 搜索结果高亮显示
        /// PS:需要添加PanGu.HighLight.dll的引用
        /// </summary>
        /// <param name="keyword"> 关键字 </param>
        /// <param name="content"> 搜索结果 </param>
        /// <returns> 高亮后结果 </returns>
        public static string HighLight(string keyword, string content)
        {
            // 创建HTMLFormatter,参数为高亮单词的前后缀
            SimpleHTMLFormatter simpleHTMLFormatter =
                new SimpleHTMLFormatter("<font style=\"font-style:normal;font-weight:bold;color:#cc0000;\"><b>", "</b></font>");
            // 创建 Highlighter ,输入HTMLFormatter 和 盘古分词对象Semgent
            Highlighter highlighter =
                new Highlighter(simpleHTMLFormatter,
                                new Segment());

            // 设置每个摘要段的字符数
            highlighter.FragmentSize = 1000;
            // 获取最匹配的摘要段
            return(highlighter.GetBestFragment(keyword, content));
        }
示例#29
0
        public string GeneratePreviewText(Lucene.Net.Search.Query q, string text)
        {
            QueryScorer scorer      = new QueryScorer(q);
            IFormatter  formatter   = new SimpleHTMLFormatter("", "");
            Highlighter highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = new SimpleFragmenter(100);
            TokenStream stream   = new SimpleAnalyzer().TokenStream(TEXT_FN, new StringReader(text));
            string      fragment = highlighter.GetBestFragments(stream, text, 2, "...");

            if (string.IsNullOrEmpty(fragment))
            {
                fragment = text.Substring(0, 100);
            }
            return(fragment);
        }
示例#30
0
        private object GetResultData(IndexSearcher searcher, TopDocs docs, Query query)
        {
            SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>");
            Highlighter         highlighter         = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));

            highlighter.TextFragmenter = new SimpleFragmenter(150);
            Analyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search);



            var result = new List <Book>();

            foreach (ScoreDoc sd in docs.ScoreDocs)
            {
                Document doc    = searcher.Doc(sd.Doc);
                var      author = highlighter.GetBestFragment(analyzer, "Author", doc.Get("Author"));
                if (string.IsNullOrWhiteSpace(author))
                {
                    author = doc.Get("Author");
                }
                var name = highlighter.GetBestFragment(analyzer, "Name", doc.Get("Name"));
                if (string.IsNullOrWhiteSpace(name))
                {
                    name = doc.Get("Name");
                }
                var fileName = highlighter.GetBestFragment(analyzer, "FileName", doc.Get("FileName"));
                if (string.IsNullOrWhiteSpace(fileName))
                {
                    fileName = doc.Get("FileName");
                }
                var content = highlighter.GetBestFragment(analyzer, "Content", doc.Get("Content"));
                if (string.IsNullOrWhiteSpace(content))
                {
                    content = doc.Get("Content");
                }
                result.Add(new Book()
                {
                    Id       = doc.Get("Id"),
                    Author   = author,
                    Name     = name,
                    FileName = fileName,
                    Content  = content
                });
            }

            return(new { Items = result, TotalCount = docs.TotalHits });
        }
示例#31
0
文件: Index.cs 项目: qiuliang/tumumi
        public ArrayList DataToList(DataSet ds,string keyWords,bool isHighlight)
        {
            ArrayList result = new ArrayList();
            foreach (System.Data.DataRow row in ds.Tables[0].Rows)
            {
                Model.DDocInfo doc = new TMM.Model.DDocInfo();
                doc.Title = row["Title"].ToString();
                doc.Description = row["Description"].ToString();
                doc.DocType = row["DocType"].ToString();
                doc.DocId = int.Parse( row["DocumentId"].ToString() );
                doc.UserId = int.Parse( row["UserId"].ToString());
                doc.CreateTime = DateTime.Parse( row["CreateTime"].ToString());
                doc.UpCount = int.Parse(row["UpCount"].ToString());
                doc.ViewCount = int.Parse(row["ViewCount"].ToString());

                if (isHighlight)
                {

                    SimpleHTMLFormatter simpleHTMLFormatter =
                        new SimpleHTMLFormatter("<font color=\"red\">", "</font>");

                    Highlighter titleHighlighter;
                    Highlighter contentHighlighter;

                    if (titleAnalyzerName.Equals("PanGuSegment", StringComparison.CurrentCultureIgnoreCase))
                    {
                        titleHighlighter =
                        new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer());
                    }
                    else if (titleAnalyzerName.Equals("EnglishAnalyzer", StringComparison.CurrentCultureIgnoreCase))
                    {
                        titleHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.EnglishAnalyzer());
                    }
                    else
                    {
                        titleHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.SimpleAnalyzer());
                    }

                    if (descAnalyzerName.Equals("PanGuSegment", StringComparison.CurrentCultureIgnoreCase))
                    {
                        contentHighlighter =
                        new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer());
                    }
                    else if (descAnalyzerName.Equals("EnglishAnalyzer", StringComparison.CurrentCultureIgnoreCase))
                    {
                        contentHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.EnglishAnalyzer());
                    }
                    else
                    {
                        contentHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.SimpleAnalyzer());
                    }

                    titleHighlighter.FragmentSize = 50;
                    contentHighlighter.FragmentSize = 50;

                    doc.SearchSummary = contentHighlighter.GetBestFragment(keyWords, doc.Description);
                    string titleHighlight = titleHighlighter.GetBestFragment(keyWords, doc.Title);

                    if (!string.IsNullOrEmpty(titleHighlight))
                    {
                        doc.Title = titleHighlight;
                    }
                }

                result.Add(doc);
            }
            return result;
        }
示例#32
0
        public void TestMaxSizeHighlightTruncates()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        var goodWord = "goodtoken";
                                        var stopWords = Support.Compatibility.SetFactory.CreateHashSet(new[] { "stoppedtoken" });

                                        var query = new TermQuery(new Term("data", goodWord));

                                        string match;
                                        StringBuilder sb = new StringBuilder();
                                        sb.Append(goodWord);
                                        for (int i = 0; i < 10000; i++)
                                        {
                                            sb.Append(" ");
                                            // only one stopword
                                            sb.Append(stopWords.First());
                                        }
                                        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
                                        Highlighter hg = helper.GetHighlighter(query, "data",
                                                                               new StandardAnalyzer(TEST_VERSION,
                                                                                                    stopWords).
                                                                                   TokenStream(
                                                                                       "data",
                                                                                       new StringReader(sb.ToString())),
                                                                               fm); // new Highlighter(fm,
                                        // new
                                        // QueryTermScorer(query));
                                        hg.TextFragmenter = new NullFragmenter();
                                        hg.MaxDocCharsToAnalyze = 100;
                                        match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "data",
                                                                   sb.ToString());
                                        Assert.IsTrue(match.Length < hg.MaxDocCharsToAnalyze,
                                                      "Matched text should be no more than 100 chars in length ");

                                        // add another tokenized word to the overrall length - but set way
                                        // beyond
                                        // the length of text under consideration (after a large slug of stop
                                        // words
                                        // + whitespace)
                                        sb.Append(" ");
                                        sb.Append(goodWord);
                                        match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "data",
                                                                   sb.ToString());
                                        Assert.IsTrue(match.Length < hg.MaxDocCharsToAnalyze,
                                                      "Matched text should be no more than 100 chars in length ");
                                    };

            helper.Start();

        }
示例#33
0
        public void TestMaxSizeEndHighlight()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                {
                    var stopWords = Support.Compatibility.SetFactory.CreateHashSet(new[] {"in", "it"});
                    TermQuery query = new TermQuery(new Term("text", "searchterm"));

                    String text = "this is a text with searchterm in it";
                    SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
                    Highlighter hg = helper.GetHighlighter(query, "text",
                                                           new StandardAnalyzer(TEST_VERSION,
                                                                                stopWords).
                                                               TokenStream("text",
                                                                           new StringReader(text)),
                                                           fm);
                    hg.TextFragmenter = new NullFragmenter();
                    hg.MaxDocCharsToAnalyze = 36;
                    String match = hg.GetBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords),
                                                      "text", text);
                    Assert.IsTrue(match.EndsWith("in it"),
                                  "Matched text should contain remainder of text after highlighted query ");
                };
            helper.Start();
        }
示例#34
0
        /*
         * This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
       * @throws InvalidTokenOffsetsException 
         */

        private static String HighlightField(Query query, String fieldName, String text)
        {
            TokenStream tokenStream = new StandardAnalyzer(TEST_VERSION).TokenStream(fieldName, new StringReader(text));
            // Assuming "<B>", "</B>" used to highlight
            SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
            QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
            Highlighter highlighter = new Highlighter(formatter, scorer);
            highlighter.TextFragmenter = new SimpleFragmenter(int.MaxValue);

            String rv = highlighter.GetBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)");
            return rv.Length == 0 ? text : rv;
        }