/// <summary> /// 发送邮件 /// </summary> static void SendMail(BlogSource source) { string blogFileName = $"{source.FileName}-{source.RecordTime:yyyy-MM-dd}.txt"; string blogFilePath = Path.Combine(_baseDataPath, source.Path, blogFileName); if (!File.Exists(blogFilePath)) { _sendLogger.Error("未发现文件记录,无法发送邮件,所需文件名:" + blogFileName); return; } //邮件正文 string mailContent = ""; var _tmpFilePath = Path.Combine(_baseDir, "Config", "MailHeader.html"); if (File.Exists(_tmpFilePath)) { mailContent += File.ReadAllText(_tmpFilePath); mailContent += "<br/>" + "<br/>"; } FileStream mailFs = new FileStream(blogFilePath, FileMode.Open, FileAccess.Read); StreamReader sr = new StreamReader(mailFs, Encoding.UTF8); while (!sr.EndOfStream) { mailContent += sr.ReadLine() + "<br/>"; } sr.Close(); mailFs.Close(); //附件内容 string blogFileContent = File.ReadAllText(blogFilePath); //发送邮件 MailUtil.SendMail(_mailConfig, _mailConfig.ReceiveList, "王薇", $"{source.Name}首页文章聚合-{source.RecordTime:yyyy-MM-dd}", mailContent, Encoding.UTF8.GetBytes(blogFileContent), blogFileName); _sendLogger.Info($"{blogFileName},文件已发送"); }
//加载数据,首先从缓存中读取 private static void LoadData() { var _tmpFilePath = Path.Combine(_baseDataPath, $"cache.tmp"); if (File.Exists(_tmpFilePath)) { try { var data = File.ReadAllText(_tmpFilePath); var res = JsonConvert.DeserializeObject <List <BlogSource> >(data); if (res != null && res.Count > 0) { BlogSourceList.AddRange(res); } } catch (Exception e) { _logger.Error("缓存数据加载失败,本次将弃用!详情:" + e.Message); File.Delete(_tmpFilePath); } } if (BlogSourceList.Count > 0) { return; } var source1 = new BlogSource() { Name = "博客园", BlogDataUrl = "https://www.cnblogs.com/", FileName = "cnblogs", Path = "Blogs", DicXPath = new Dictionary <string, string>(), RecordTime = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day, 9, 0, 0), PreviousBlogs = new List <Blog>() }; source1.DicXPath["item"] = "//div[@class='post_item_body']"; source1.DicXPath["title"] = "h3/a"; source1.DicXPath["summary"] = "p[@class='post_item_summary']"; source1.DicXPath["foot"] = "div[@class='post_item_foot']"; source1.DicXPath["author"] = "a"; BlogSourceList.Add(source1); //产品经理 var source2 = new BlogSource() { Name = "人人都是产品经理", BlogDataUrl = "http://www.woshipm.com/", FileName = "woshipm", Path = "WoshiPm", DicXPath = new Dictionary <string, string>(), RecordTime = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day, 17, 0, 0), PreviousBlogs = new List <Blog>() }; source2.DicXPath["item"] = "//div[@class='postlist-item u-clearfix']"; source2.DicXPath["title"] = "div/h2/a"; source2.DicXPath["summary"] = "div/p[@class='des']"; source2.DicXPath["foot"] = "div/div[@class='stream-list-meta']"; source2.DicXPath["author"] = "span[@class='author']/a"; source2.DicXPath["date"] = "time"; BlogSourceList.Add(source2); }
/// <summary> /// 抓取博客园 /// </summary> static void Work(BlogSource source) { try { Sw.Reset(); Sw.Start(); //重复数量统计 int repeatCount = 0; string html = HttpUtil.GetString(source.BlogDataUrl); List <Blog> blogs = new List <Blog>(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); //获取所有文章数据项 var itemBodys = doc.DocumentNode.SelectNodes(source.DicXPath["item"]); foreach (var itemBody in itemBodys) { //标题元素 var titleElem = itemBody.SelectSingleNode(source.DicXPath["title"]); //获取标题 var title = titleElem?.InnerText; //获取url var url = titleElem?.Attributes["href"]?.Value; //摘要元素 var summaryElem = itemBody.SelectSingleNode(source.DicXPath["summary"]); //获取摘要 var summary = summaryElem?.InnerText.Replace("\r\n", "").Trim(); //数据项底部元素 var footElem = itemBody.SelectSingleNode(source.DicXPath["foot"]); //获取作者 var author = footElem?.SelectSingleNode(source.DicXPath["author"])?.InnerText; //获取文章发布时间 var publishTime = (source.Path == "WoshiPm") ? footElem?.SelectSingleNode(source.DicXPath["date"])?.InnerText : Regex.Match(footElem?.InnerText, "\\d+-\\d+-\\d+ \\d+:\\d+").Value; //组装博客对象 Blog blog = new Blog() { Title = title, Url = url, Summary = summary, Author = author, PublishTime = DateTime.Parse(publishTime) }; blogs.Add(blog); } string blogFileName = $"{source.FileName}-{DateTime.Now:yyyy-MM-dd}.txt"; string blogFilePath = Path.Combine(_baseDataPath, source.Path, blogFileName); FileStream fs = new FileStream(blogFilePath, FileMode.Append, FileAccess.Write); StreamWriter sw = new StreamWriter(fs, Encoding.UTF8); //去重 foreach (var blog in blogs) { if (source.PreviousBlogs.Any(b => b.Url == blog.Url)) { repeatCount++; } else { sw.WriteLine($"标题:{blog.Title}"); sw.WriteLine($"网址:{blog.Url}"); sw.WriteLine($"摘要:{blog.Summary}"); sw.WriteLine($"作者:{blog.Author}"); sw.WriteLine($"发布时间:{blog.PublishTime:yyyy-MM-dd HH:mm}"); sw.WriteLine("--------------华丽的分割线---------------"); } } sw.Close(); fs.Close(); //清除上一次抓取数据记录 source.PreviousBlogs.Clear(); //加入本次抓取记录 source.PreviousBlogs.AddRange(blogs); Sw.Stop(); //统计信息 _logger.Info($"Get {source.Name} data success,Time:{Sw.ElapsedMilliseconds}ms,Data Count:{blogs.Count},Repeat:{repeatCount},Effective:{blogs.Count - repeatCount}"); //发送邮件 if ((DateTime.Now - source.RecordTime).TotalHours >= 24) { _sendLogger.Info($"准备发送{source.Name}聚合邮件,记录时间:{source.RecordTime:yyyy-MM-dd HH:mm:ss}"); SendMail(source); source.RecordTime = source.RecordTime.AddDays(1); _sendLogger.Info($"{source.Name}记录时间已更新:{source.RecordTime:yyyy-MM-dd HH:mm:ss}"); } SaveData(); } catch (Exception ex) { System.Console.WriteLine(ex.Message); throw; } finally { Sw.Stop(); } }