Пример #1
0
 public static void ReptitleChongbuluoUrl()
 {
     #region 数据搜索
     Dictionary <string, string> dict = new Dictionary <string, string>();
     dict.Add("学术搜索", "http://scholar.chongbuluo.com/");
     dict.Add("数据搜索", "http://data.chongbuluo.com/");
     dict.Add("图片搜索", "http://image.chongbuluo.com/");
     dict.Add("快搜索", "http://search.chongbuluo.com/");
     var               bll = new U_Url_ListBLL();
     U_Url_List        entity;
     List <U_Url_List> listEntity = new List <U_Url_List>();
     foreach (var d in dict)
     {
         var result = DownloadData.GetDownloadData(d.Value);
         var filter = " <ul id=\"foo\" class=\"chongbuluo\">";
         result = result.Substring(result.IndexOf(filter) + filter.Length);
         filter = "</ul>";
         result = result.Substring(0, result.IndexOf(filter));
         var list = Regex.Split(result, "</li>");
         foreach (var item in list)
         {
             var a = item.Trim();
             if (!a.StartsWith("<li "))
             {
                 continue;
             }
             entity = new U_Url_List();
             var x = Regex.Split(item, ">");
             if (x.Length < 4)
             {
                 continue;
             }
             entity.Id      = System.Guid.NewGuid().ToString("N");
             entity.IconImg = Regex.Split(x[1], "\"")[1];
             if (a.Contains("<ul class=\"more\">"))
             {
                 entity.Url  = Regex.Split(x[7], "\"")[1];
                 entity.Name = Regex.Split(x[8], "<")[0];
             }
             else
             {
                 entity.Url  = Regex.Split(x[2], "\"")[1];
                 entity.Name = Regex.Split(x[3], "<")[0];
             }
             entity.Source      = d.Value + " 爬取";
             entity.Create_Time = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");
             entity.Create_Id   = "pc";
             entity.Status      = 1;
             entity.Types       = d.Key;
             listEntity.Add(entity);
             Console.WriteLine(string.Format("{0} {1} {2}写入成功", d.Key, entity.Name, entity.Url));
         }
         bll.Add(listEntity);
         Console.WriteLine("虫虫部落抓取写入完成……");
     }
     #endregion
 }
Пример #2
0
        public static void ReptitleH_UIUrl()
        {
            string url    = "http://www.h-ui.net/site.shtml";
            var    result = DownloadData.GetDownloadData(url);
            string filter = "<div class=\"bk_gray mt-10\">";

            if (result.Contains(filter))
            {
                result = result.Substring(result.IndexOf(filter));
            }
            filter = "</article>";
            if (result.Contains(filter))
            {
                result = result.Substring(0, result.IndexOf(filter));
            }
            filter = "<dl class=\"sitelist_1 cl\">";
            foreach (var item in Regex.Split(result, filter).Where(n => n.Trim().StartsWith("<dt class")))
            {
                result = item;
                filter = ">";
                if (result.Contains(filter))
                {
                    result = result.Substring(result.IndexOf(filter) + filter.Length);
                }
                filter = "<";
                if (result.Contains(filter))
                {
                    result = result.Substring(0, result.IndexOf(filter));
                }
                var titie = result;

                filter = "<ul class=\"cl\">";
                if (item.Contains(filter))
                {
                    result = item.Substring(item.IndexOf(filter) + filter.Length);
                }
                filter = "</ul>";
                if (result.Contains(filter))
                {
                    result = result.Substring(0, result.IndexOf(filter)).Trim();
                }
                filter = "<li>";
                var               list = Regex.Split(result, filter);
                var               bll  = new U_Url_ListBLL();
                U_Url_List        entity;
                List <U_Url_List> listEntity = new List <U_Url_List>();
                foreach (var key in list)
                {
                    if (string.IsNullOrEmpty(key))
                    {
                        continue;
                    }
                    filter = "\"";
                    var k = Regex.Split(key, filter);
                    if (k.Length > 6)
                    {
                        entity = new U_Url_List();
                        if (ReptitleDownload.VerifyURLIsValid(k[5]))
                        {
                            result = k[6];
                            filter = "</a>";
                            if (result.Contains(filter))
                            {
                                result = result.Substring(0, result.IndexOf(filter)).Substring(1);
                            }
                            else
                            {
                                result = k[7];
                            }
                            entity.Url = k[5];
                        }
                        else if (ReptitleDownload.VerifyURLIsValid(k[7]))
                        {
                            result = k[8];
                            filter = "</a>";
                            if (result.Contains(filter))
                            {
                                result = result.Substring(0, result.IndexOf(filter)).Substring(1);
                            }
                            entity.Url = k[7];
                        }
                        entity.Id          = System.Guid.NewGuid().ToString("N");
                        entity.Name        = result;
                        entity.Source      = url + " 爬取";
                        entity.Create_Time = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");
                        entity.Create_Id   = "pc";
                        entity.Status      = 1;
                        entity.Types       = titie;
                        listEntity.Add(entity);
                        Console.WriteLine(string.Format("{0} {1} {2}", titie, k[5], result));
                    }
                    else
                    {
                        Console.WriteLine("异常数据:" + key);
                    }
                }
                bll.Add(listEntity);
                Console.WriteLine("" + url + "落抓取写入完成……");
            }
        }