public static ModelReleaseInfo CrawlHtmlSource(string html, string url, DataTable dtkey, string sheng, string shi, string xian, string webName, string webInfo, int pid) { //string strURLformat = "https?://.[^\"]+"; Dictionary <string, int> events = new Dictionary <string, int>(); //创建数据对象 ModelReleaseInfo newsInfo = new ModelReleaseInfo(); try { newsInfo.Title = HtmlUtil.NoHTML(html); //newsInfo.Title = html; for (int j = 0; j < dtkey.Rows.Count; j++) { string[] keys = dtkey.Rows[j][4].ToString().Split(new char[] { ' ' }); if (!events.ContainsKey(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString())) { events.Add(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString(), 1); foreach (string k in keys) { if (!html.ToLower().Contains(k.ToLower())) { events.Remove(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString()); break; } } } } foreach (KeyValuePair <string, int> ev in events) { if (ev.Value == 1) { newsInfo.KeyWords += "," + ev.Key.Split(new char[] { '-' })[0] + "-" + int.Parse(ev.Key.Split(new char[] { '-' })[1]); } } newsInfo.Contexts = HtmlUtil.NoHTML(webInfo); //网站链接 newsInfo.InfoSource = url; //关键字的设置 if (newsInfo.KeyWords == null || newsInfo.KeyWords.Length == 0) { for (int j = 0; j < dtkey.Rows.Count; j++) { //Application.DoEvents(); string[] keys = dtkey.Rows[j][4].ToString().Split(new char[] { ' ' }); if (!events.ContainsKey(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString())) { events.Add(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString(), 1); foreach (string k in keys) { if (!newsInfo.Contexts.ToLower().Contains(k.ToLower())) { events.Remove(dtkey.Rows[j][1].ToString() + "-" + dtkey.Rows[j][6].ToString()); break; } } } } foreach (KeyValuePair <string, int> ev in events) { if (ev.Value == 1) { newsInfo.KeyWords += "," + ev.Key.Split(new char[] { '-' })[0] + "-" + int.Parse(ev.Key.Split(new char[] { '-' })[1]); } } } //if (newsInfo.KeyWords.Length == 0) { continue; } if (newsInfo.KeyWords != null) { newsInfo.KeyWords = newsInfo.KeyWords.Substring(1); } //收集日期 newsInfo.CollectDate = string.Format(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss")); //发布人和发布日期暂时无法取到,手工赋值为空 newsInfo.ReleaseDate = ""; newsInfo.ReleaseName = ""; //网页快照,这里为用户指定生成,如果未选择生成,那么为空 newsInfo.Snapshot = ""; newsInfo.Sheng = sheng == null ? "" : sheng; newsInfo.Shi = shi == null ? "" : shi; newsInfo.Xian = xian == null ? "" : xian; //网站名 newsInfo.WebName = webName == null ? "" : webName; //pid newsInfo.Pid = pid; //part正负判断 newsInfo.Part = GetParts(newsInfo.Contexts); //reposts newsInfo.Reposts = 0; //comments newsInfo.Comments = 0; } catch (Exception ex) { Comm.WriteErrorLog(ex.StackTrace); } return(newsInfo); }