public HttpResponseMessage AddUrl([FromBody] U_Url_List p) { U_Url_ListBLL bll_UrlList = new U_Url_ListBLL(); U_Url_TypeBLL bllType = new U_Url_TypeBLL(); string returnInfo = string.Empty; #region 添加网站 try { if (!string.IsNullOrEmpty(p.Url) && !string.IsNullOrEmpty(p.Name)) { if (p.Url.IsUrlFormat()) { if (bllType.Find(string.Format("AND ID='{0}'", p.Types)) != null) { if (bll_UrlList.Find(string.Format("AND Url='{0}'", p.Url)) == null) { if (!string.IsNullOrEmpty(p.IconImg)) { if (!p.IconImg.IsUrlFormat()) { returnInfo = "网址LOG错误,不是Url地址,请检查"; } } if (string.IsNullOrEmpty(returnInfo)) { p.Id = GetStr.GetGuid; bll_UrlList.Add(new List <U_Url_List> { p }); returnInfo = "添加成功"; } } else { returnInfo = "网址已存在,请确认"; } } else { returnInfo = "添加的类型不存,请确认"; } } else { returnInfo = "网址地址,错误,请检查"; } } else { returnInfo = "网址地址或名称为空,请检查"; } } catch { returnInfo = "发生错误"; } #endregion return(JsonConvert.SerializeObject(returnInfo).ToHttpResponseMessage()); }
public static void ReptitleChongbuluoUrl() { #region 数据搜索 Dictionary <string, string> dict = new Dictionary <string, string>(); dict.Add("学术搜索", "http://scholar.chongbuluo.com/"); dict.Add("数据搜索", "http://data.chongbuluo.com/"); dict.Add("图片搜索", "http://image.chongbuluo.com/"); dict.Add("快搜索", "http://search.chongbuluo.com/"); var bll = new U_Url_ListBLL(); U_Url_List entity; List <U_Url_List> listEntity = new List <U_Url_List>(); foreach (var d in dict) { var result = DownloadData.GetDownloadData(d.Value); var filter = " <ul id=\"foo\" class=\"chongbuluo\">"; result = result.Substring(result.IndexOf(filter) + filter.Length); filter = "</ul>"; result = result.Substring(0, result.IndexOf(filter)); var list = Regex.Split(result, "</li>"); foreach (var item in list) { var a = item.Trim(); if (!a.StartsWith("<li ")) { continue; } entity = new U_Url_List(); var x = Regex.Split(item, ">"); if (x.Length < 4) { continue; } entity.Id = System.Guid.NewGuid().ToString("N"); entity.IconImg = Regex.Split(x[1], "\"")[1]; if (a.Contains("<ul class=\"more\">")) { entity.Url = Regex.Split(x[7], "\"")[1]; entity.Name = Regex.Split(x[8], "<")[0]; } else { entity.Url = Regex.Split(x[2], "\"")[1]; entity.Name = Regex.Split(x[3], "<")[0]; } entity.Source = d.Value + " 爬取"; entity.Create_Time = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); entity.Create_Id = "pc"; entity.Status = 1; entity.Types = d.Key; listEntity.Add(entity); Console.WriteLine(string.Format("{0} {1} {2}写入成功", d.Key, entity.Name, entity.Url)); } bll.Add(listEntity); Console.WriteLine("虫虫部落抓取写入完成……"); } #endregion }
public ActionResult AddUrl(string Url, string Name, string IconImg, string Title, string Types) { string returnInfo = string.Empty; #region 添加网站 try { U_Url_List t = new U_Url_List(); t.Url = Url; t.Name = Ext.FilterMark(Name); t.IconImg = IconImg; t.Title = Ext.FilterMark(Title); t.Types = Ext.FilterMark(Types); t.Status = EnumUrlStatus.New.GetHashCode(); t.Source = "用户添加"; t.Create_Time = GetStr.GetCurrentDate; if (!string.IsNullOrEmpty(t.Url) && !string.IsNullOrEmpty(t.Name)) { if (t.Url.IsUrlFormat()) { if (bllType.Find(string.Format("AND ID='{0}'", t.Types)) != null) { if (bll_UrlList.Find(string.Format("AND Url='{0}'", t.Url)) == null) { if (!string.IsNullOrEmpty(t.IconImg)) { if (!t.IconImg.IsUrlFormat()) { returnInfo = "网址LOG错误,不是Url地址,请检查"; } } if (string.IsNullOrEmpty(returnInfo)) { t.Id = GetStr.GetGuid; bll_UrlList.Add(new List <U_Url_List> { t }); returnInfo = "添加成功"; } } else { returnInfo = "网址已存在,请确认"; } } else { returnInfo = "添加的类型不存,请确认"; } } else { returnInfo = "网址地址,错误,请检查"; } } else { returnInfo = "网址地址或名称为空,请检查"; } } catch { returnInfo = "发生错误"; } #endregion return(this.Json(returnInfo, JsonRequestBehavior.AllowGet)); }
public static void ReptitleH_UIUrl() { string url = "http://www.h-ui.net/site.shtml"; var result = DownloadData.GetDownloadData(url); string filter = "<div class=\"bk_gray mt-10\">"; if (result.Contains(filter)) { result = result.Substring(result.IndexOf(filter)); } filter = "</article>"; if (result.Contains(filter)) { result = result.Substring(0, result.IndexOf(filter)); } filter = "<dl class=\"sitelist_1 cl\">"; foreach (var item in Regex.Split(result, filter).Where(n => n.Trim().StartsWith("<dt class"))) { result = item; filter = ">"; if (result.Contains(filter)) { result = result.Substring(result.IndexOf(filter) + filter.Length); } filter = "<"; if (result.Contains(filter)) { result = result.Substring(0, result.IndexOf(filter)); } var titie = result; filter = "<ul class=\"cl\">"; if (item.Contains(filter)) { result = item.Substring(item.IndexOf(filter) + filter.Length); } filter = "</ul>"; if (result.Contains(filter)) { result = result.Substring(0, result.IndexOf(filter)).Trim(); } filter = "<li>"; var list = Regex.Split(result, filter); var bll = new U_Url_ListBLL(); U_Url_List entity; List <U_Url_List> listEntity = new List <U_Url_List>(); foreach (var key in list) { if (string.IsNullOrEmpty(key)) { continue; } filter = "\""; var k = Regex.Split(key, filter); if (k.Length > 6) { entity = new U_Url_List(); if (ReptitleDownload.VerifyURLIsValid(k[5])) { result = k[6]; filter = "</a>"; if (result.Contains(filter)) { result = result.Substring(0, result.IndexOf(filter)).Substring(1); } else { result = k[7]; } entity.Url = k[5]; } else if (ReptitleDownload.VerifyURLIsValid(k[7])) { result = k[8]; filter = "</a>"; if (result.Contains(filter)) { result = result.Substring(0, result.IndexOf(filter)).Substring(1); } entity.Url = k[7]; } entity.Id = System.Guid.NewGuid().ToString("N"); entity.Name = result; entity.Source = url + " 爬取"; entity.Create_Time = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); entity.Create_Id = "pc"; entity.Status = 1; entity.Types = titie; listEntity.Add(entity); Console.WriteLine(string.Format("{0} {1} {2}", titie, k[5], result)); } else { Console.WriteLine("异常数据:" + key); } } bll.Add(listEntity); Console.WriteLine("" + url + "落抓取写入完成……"); } }