private void btn_OK_Click(object sender, EventArgs e) { if (!String.IsNullOrEmpty(txt_NewExpression.Text)) { using (RegularDAL dal = new RegularDAL ()) { if (dal.GetRegular(txt_NewExpression.Text.Trim()) == null) { Regular regular = new Regular() { ID = Guid.NewGuid(), Name = txt_NewExpression.Text.Trim(), Site = comb_Site.SelectedItem as Site, Type = comb_TypeList.SelectedItem as Entities.Type }; if (dal.AddRegular(regular)) { txt_NewExpression.Text = ""; BindRegularList(comb_Site.SelectedItem as Site); MessageBox.Show("添加成功!"); } else { MessageBox.Show("添加失败!"); } } else { MessageBox.Show("该正则已存在!"); } } } else { MessageBox.Show("正则表达式不能为空!"); } }
public List<Model.Address> GetArticleAddress(Regular regular, string html) { List<Model.Address> addressList = new List<Model.Address>(); string clearFilter = @"[\t\n\r]"; html = Regex.Replace(html, clearFilter, ""); string mainNews = ""; string newsFilter =@"<div class=" +"\""+regular.Name+"\""+ ">.*?</div>"; MatchCollection newsMatch = Regex.Matches(html, newsFilter); if (newsMatch.Count<=0) { newsFilter =@"<div class="+"\'"+ regular.Name+"\'"+ ">.*?</div>"; newsMatch = Regex.Matches(html, newsFilter); } //去掉换行 拼接字符串 foreach (var item in newsMatch) { mainNews += item.ToString(); } //取链接的正则表达式 Regex regex2 = new Regex(@"<a[^>]+href=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>(?<text>.*?)</a>", RegexOptions.IgnoreCase); for (Match match2 = regex2.Match(mainNews); match2.Success; match2 = match2.NextMatch()) { string url = match2.Groups["href"].Value; addressList.Add(new Model.Address() { address = url, site = regular.Site, type = regular.Type }); } return addressList; }