public static void ReptitleChongbuluoUrl() { #region 数据搜索 Dictionary <string, string> dict = new Dictionary <string, string>(); dict.Add("学术搜索", "http://scholar.chongbuluo.com/"); dict.Add("数据搜索", "http://data.chongbuluo.com/"); dict.Add("图片搜索", "http://image.chongbuluo.com/"); dict.Add("快搜索", "http://search.chongbuluo.com/"); var bll = new U_Url_ListBLL(); U_Url_List entity; List <U_Url_List> listEntity = new List <U_Url_List>(); foreach (var d in dict) { var result = DownloadData.GetDownloadData(d.Value); var filter = " <ul id=\"foo\" class=\"chongbuluo\">"; result = result.Substring(result.IndexOf(filter) + filter.Length); filter = "</ul>"; result = result.Substring(0, result.IndexOf(filter)); var list = Regex.Split(result, "</li>"); foreach (var item in list) { var a = item.Trim(); if (!a.StartsWith("<li ")) { continue; } entity = new U_Url_List(); var x = Regex.Split(item, ">"); if (x.Length < 4) { continue; } entity.Id = System.Guid.NewGuid().ToString("N"); entity.IconImg = Regex.Split(x[1], "\"")[1]; if (a.Contains("<ul class=\"more\">")) { entity.Url = Regex.Split(x[7], "\"")[1]; entity.Name = Regex.Split(x[8], "<")[0]; } else { entity.Url = Regex.Split(x[2], "\"")[1]; entity.Name = Regex.Split(x[3], "<")[0]; } entity.Source = d.Value + " 爬取"; entity.Create_Time = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); entity.Create_Id = "pc"; entity.Status = 1; entity.Types = d.Key; listEntity.Add(entity); Console.WriteLine(string.Format("{0} {1} {2}写入成功", d.Key, entity.Name, entity.Url)); } bll.Add(listEntity); Console.WriteLine("虫虫部落抓取写入完成……"); } #endregion }
public static void ReptitleH_UIUrl() { string url = "http://www.h-ui.net/site.shtml"; var result = DownloadData.GetDownloadData(url); string filter = "<div class=\"bk_gray mt-10\">"; if (result.Contains(filter)) { result = result.Substring(result.IndexOf(filter)); } filter = "</article>"; if (result.Contains(filter)) { result = result.Substring(0, result.IndexOf(filter)); } filter = "<dl class=\"sitelist_1 cl\">"; foreach (var item in Regex.Split(result, filter).Where(n => n.Trim().StartsWith("<dt class"))) { result = item; filter = ">"; if (result.Contains(filter)) { result = result.Substring(result.IndexOf(filter) + filter.Length); } filter = "<"; if (result.Contains(filter)) { result = result.Substring(0, result.IndexOf(filter)); } var titie = result; filter = "<ul class=\"cl\">"; if (item.Contains(filter)) { result = item.Substring(item.IndexOf(filter) + filter.Length); } filter = "</ul>"; if (result.Contains(filter)) { result = result.Substring(0, result.IndexOf(filter)).Trim(); } filter = "<li>"; var list = Regex.Split(result, filter); var bll = new U_Url_ListBLL(); U_Url_List entity; List <U_Url_List> listEntity = new List <U_Url_List>(); foreach (var key in list) { if (string.IsNullOrEmpty(key)) { continue; } filter = "\""; var k = Regex.Split(key, filter); if (k.Length > 6) { entity = new U_Url_List(); if (ReptitleDownload.VerifyURLIsValid(k[5])) { result = k[6]; filter = "</a>"; if (result.Contains(filter)) { result = result.Substring(0, result.IndexOf(filter)).Substring(1); } else { result = k[7]; } entity.Url = k[5]; } else if (ReptitleDownload.VerifyURLIsValid(k[7])) { result = k[8]; filter = "</a>"; if (result.Contains(filter)) { result = result.Substring(0, result.IndexOf(filter)).Substring(1); } entity.Url = k[7]; } entity.Id = System.Guid.NewGuid().ToString("N"); entity.Name = result; entity.Source = url + " 爬取"; entity.Create_Time = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); entity.Create_Id = "pc"; entity.Status = 1; entity.Types = titie; listEntity.Add(entity); Console.WriteLine(string.Format("{0} {1} {2}", titie, k[5], result)); } else { Console.WriteLine("异常数据:" + key); } } bll.Add(listEntity); Console.WriteLine("" + url + "落抓取写入完成……"); } }