public void StartWrite() { Action actionG = () => { List <ModelReleaseInfo> data; while (true) { if (Program.ProClose) { break; } if (Queue.TryDequeue(out data)) { //// 写入数据库 #region 数据入库 try { StringBuilder sb = new StringBuilder(); sb.Append(""); TbReleaseInfo tri = new TbReleaseInfo(); MySqlCmd cmd = new MySqlCmd(); foreach (var mri in data) { #region 2016.11.16 加入精确匹配的判断 string keywords = mri.KeyWords; string title = mri.Title; string context = mri.Contexts; if (!string.IsNullOrEmpty(keywords)) { bool isFundTitle = true; bool isFundContext = true; string[] keyw = keywords.Split(' '); if (keyw != null && keyw.Count() > 0) { foreach (string key in keyw) { if (title.IndexOf(key) < 0) { isFundTitle = false; } if (context.IndexOf(key) < 0) { isFundContext = false; } } } if (!isFundTitle && !isFundContext) { //如果标题或者内容没有匹配全部关键字则去掉该条数据 continue; } } #endregion if (tri.GetReleaseInfoCount(mri.InfoSource, mri.KeyWords) > 0) { continue; } string sql = tri.GetInsertStr(mri); if (!sql.Trim().EndsWith(";")) { sql += sql + ";"; } sb.Append(sql); } if (sb.ToString().Length > 0) { //执行插入 cmd.ExecuteNonQuery(sb.ToString()); //清除插入字段串 sb.Clear(); } } catch (Exception ex) { Comm.WriteErrorLog(ex.Message); Comm.WriteErrorLog(ex.StackTrace); } #endregion log.Info("数据层写入数据库成功"); } else { System.Threading.Thread.Sleep(1000); } } }; Parallel.Invoke(actionG, actionG); }
public static ModelReleaseInfo CrawlHtmlSource(string html, string url, DataTable dtkey, string sheng, string shi, string xian, string webName, string webInfo, int pid) { //string strURLformat = "https?://.[^\"]+"; Dictionary <string, int> events = new Dictionary <string, int>(); //创建数据对象 ModelReleaseInfo newsInfo = new ModelReleaseInfo(); try { newsInfo.Title = HtmlUtil.NoHTML(html); //newsInfo.Title = html; for (int j = 0; j < dtkey.Rows.Count; j++) { string[] keys = dtkey.Rows[j][4].ToString().Split(new char[] { ' ' }); if (!events.ContainsKey(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString())) { events.Add(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString(), 1); foreach (string k in keys) { if (!html.ToLower().Contains(k.ToLower())) { events.Remove(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString()); break; } } } } foreach (KeyValuePair <string, int> ev in events) { if (ev.Value == 1) { newsInfo.KeyWords += "," + ev.Key.Split(new char[] { '-' })[0] + "-" + int.Parse(ev.Key.Split(new char[] { '-' })[1]); } } newsInfo.Contexts = HtmlUtil.NoHTML(webInfo); //网站链接 newsInfo.InfoSource = url; //关键字的设置 if (newsInfo.KeyWords == null || newsInfo.KeyWords.Length == 0) { for (int j = 0; j < dtkey.Rows.Count; j++) { //Application.DoEvents(); string[] keys = dtkey.Rows[j][4].ToString().Split(new char[] { ' ' }); if (!events.ContainsKey(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString())) { events.Add(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString(), 1); foreach (string k in keys) { if (!newsInfo.Contexts.ToLower().Contains(k.ToLower())) { events.Remove(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString()); break; } } } } foreach (KeyValuePair <string, int> ev in events) { if (ev.Value == 1) { newsInfo.KeyWords += "," + ev.Key.Split(new char[] { '-' })[0] + "-" + int.Parse(ev.Key.Split(new char[] { '-' })[1]); } } } //if (newsInfo.KeyWords.Length == 0) { continue; } if (newsInfo.KeyWords != null) { newsInfo.KeyWords = newsInfo.KeyWords.Substring(1); } //收集日期 newsInfo.CollectDate = string.Format(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss")); //发布人和发布日期暂时无法取到,手工赋值为空 newsInfo.ReleaseDate = ""; newsInfo.ReleaseName = ""; //网页快照,这里为用户指定生成,如果未选择生成,那么为空 newsInfo.Snapshot = ""; newsInfo.Sheng = sheng == null ? "" : sheng; newsInfo.Shi = shi == null ? "" : shi; newsInfo.Xian = xian == null ? "" : xian; //网站名 newsInfo.WebName = webName == null ? "" : webName; //pid newsInfo.Pid = pid; //part正负判断 newsInfo.Part = GetParts(newsInfo.Contexts); //reposts newsInfo.Reposts = 0; //comments newsInfo.Comments = 0; } catch (Exception ex) { Comm.WriteErrorLog(ex.StackTrace); } return(newsInfo); }