示例#1
0
        public string TutByParseNews(string newsUrl)
        {
            try
            {
                var web         = new HtmlWeb();
                var doc         = web.Load(newsUrl);
                var docNode     = doc.DocumentNode;
                var listContent = docNode.Descendants("div")
                                  .Where(d => d.Id == "article_body")
                                  .FirstOrDefault()?
                                  .InnerHtml;
                return(HtmlCleaner.CleanHtml(listContent));
            }

            catch (Exception ex)
            {
                throw ex;
            }
        }
示例#2
0
 public string OnlinerParseNews(string newsUrl)
 {
     try
     {
         var web         = new HtmlWeb();
         var doc         = web.Load(newsUrl);
         var docNode     = doc.DocumentNode;
         var listContent = docNode.Descendants()
                           .Where(d => d.Name == "div")
                           .Where(d => d.Attributes.FirstOrDefault().Name == "class")
                           .Where(d => d.Attributes.FirstOrDefault().Value == "news-text")
                           .FirstOrDefault()?
                           .InnerHtml;
         return(HtmlCleaner.CleanHtml(listContent));
     }
     catch (Exception ex)
     {
         throw ex;
     }
 }
        public async Task LoadAllNewsBody()
        {
            try
            {
                foreach (News news in await _unitOfWork.NewsRepository.GetAllAsync())
                {
                    if (news == null && news.Body != null)
                    {
                        continue;
                    }

                    if (news.Source.Contains("tut.by"))            //Check origin site
                    {
                        try { news.Body = _newsParser.TutByParseNews(news.Source); } catch { continue; }
                    }
                    if (news.Source.Contains("onliner.by"))
                    {
                        try { news.Body = _newsParser.OnlinerParseNews(news.Source); } catch { continue; }
                    }

                    if (news.Body != null)
                    {
                        news.Body      = HtmlCleaner.CleanHtml(news.Body);
                        news.PlainText = HtmlCleaner.GetPlainText(news.Body);
                        await _unitOfWork.SaveDBAsync();
                    }
                    else
                    {
                        Log.Information($"{DateTime.Now}|Info|Couldnt get body of {news.Source}");
                    }
                }
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }