예제 #1
0
        /// <summary>
        /// 分词
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public List <string> CutKeywords(string keyword)
        {
            if (_memoryCache.TryGetValue(keyword, out List <string> list))
            {
                return(list);
            }
            var set = new HashSet <string>
            {
                keyword
            };
            var mc = Regex.Matches(keyword, @"(([A-Z]*[a-z]*)[\d]*)([\u4E00-\u9FA5]+)*((?!\p{P}).)*");

            foreach (Match m in mc)
            {
                set.Add(m.Value);
                foreach (Group g in m.Groups)
                {
                    set.Add(g.Value);
                }
            }
            if (keyword.Length >= 6)
            {
                try
                {
                    var res = HttpClient.GetAsync($"/api/customsearch/keywords?title={keyword}").Result;
                    if (res.StatusCode == HttpStatusCode.OK)
                    {
                        BaiduAnalysisModel model = JsonConvert.DeserializeObject <BaiduAnalysisModel>(res.Content.ReadAsStringAsync().Result);
                        model.Result.Res.KeywordList?.ForEach(s => set.Add(s));
                    }
                }
                catch
                {
                    // ignored
                }
            }

            var segmenter = new JiebaSegmenter();

            foreach (string word in segmenter.CutForSearch(keyword))
            {
                set.Add(word);
            }
            set.RemoveWhere(s => s.Length < 2 || Regex.IsMatch(s, @"^\p{P}.*"));
            list = set.OrderByDescending(s => s.Length).ToList();
            _memoryCache.Set(keyword, list, TimeSpan.FromHours(1));
            return(list);
        }
예제 #2
0
        /// <summary>
        /// 分词
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public static List <string> CutKeywords(string keyword)
        {
            var list = new HashSet <string>
            {
                keyword
            };
            var mc = Regex.Matches(keyword, @"(([A-Z]*[a-z]*)[\d]*)([\u4E00-\u9FA5]+)*((?!\p{P}).)*");

            foreach (Match m in mc)
            {
                list.Add(m.Value);
                foreach (Group g in m.Groups)
                {
                    list.Add(g.Value);
                }
            }
            if (keyword.Length >= 6)
            {
                using (HttpClient client = new HttpClient()
                {
                    BaseAddress = new Uri("http://zhannei.baidu.com")
                })
                {
                    try
                    {
                        var res = client.GetAsync($"/api/customsearch/keywords?title={keyword}").Result;
                        if (res.StatusCode == HttpStatusCode.OK)
                        {
                            BaiduAnalysisModel model = JsonConvert.DeserializeObject <BaiduAnalysisModel>(res.Content.ReadAsStringAsync().Result, new JsonSerializerSettings()
                            {
                                NullValueHandling = NullValueHandling.Ignore
                            });
                            if (model.Result.Res.KeywordList != null && model.Result.Res.KeywordList.Any())
                            {
                                list.AddRange(model.Result.Res.KeywordList.ToArray());
                            }
                        }
                    }
                    catch
                    {
                        // ignored
                    }
                }
            }
            list.RemoveWhere(s => s.Length < 2 || Regex.IsMatch(s, @"^\p{P}.*"));
            return(list.OrderByDescending(s => s.Length).ToList());
        }