Esempio n. 1
0
        public async static void Start()
        {
            while (true)
            {
                var waittime = 24 * 60 * 60 * 1000;

                try
                {
                    using (var db = new BizDataContext())
                    {
                        var temp = db.Set <T_Configuration>().Where(p => p.Configuration_Key == CommonHelper.LINKKEY).FirstOrDefault();
                        if (temp != null)
                        {
                            waittime = Int32.Parse(temp.Configuration_Value);
                        }

                        var services = await db.Set <T_HTZ_ServiceApp>().Where(p => p.State == v_common.YesState).ToListAsync();

                        foreach (var item in services)
                        {
                            if (item.App_IsEnable ?? false)
                            {
                                LinkValidate(item.HTZ_ServiceApp_Id, item.App_URL, (int)ServiceType.App, item.HTZ_ServiceApp_Name);
                            }

                            if (item.Web_IsEnable ?? false)
                            {
                                LinkValidate(item.HTZ_ServiceApp_Id, item.Web_URL, (int)ServiceType.Web, item.HTZ_ServiceApp_Name);
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    var e = new CrawlerException()
                    {
                        crawlertype      = (int)HTZ_ExceptionHandler_ServiceTypeEnum.Service,
                        exceptionbrief   = "应用服务链路异常",
                        exceptionmessage = ex.Message,
                        statuscode       = 501,
                        serviceid        = 1
                    };
                    await CommonHelper.SaveException(e);
                }
                finally
                {
                    Thread.Sleep(waittime);
                }
            }
        }
Esempio n. 2
0
        /// <summary>
        /// 抓取青网本地信息
        /// </summary>
        public static void InfomationDetailCrawler(string url, XmlNode infoNode, T_Information info, v_crawler crawler, string encode)
        {
            var infoDetailCrawler = new SimpleCrawler();//新建一个爬虫服务

            infoDetailCrawler.OnError += (s, e) =>
            {
                var ee = new CrawlerException()
                {
                    crawlertype      = (int)HTZ_ExceptionHandler_ServiceTypeEnum.DataGrab,
                    exceptionbrief   = "详情抓取出错",
                    exceptionmessage = e.Exception.Message,
                    statuscode       = 500,
                    serviceid        = crawler.id
                };
                throw ee;
            };
            infoDetailCrawler.OnCompleted += async(s, e) =>
            {
                try
                {
                    using (var db = new BizDataContext())
                    {
                        await SaveInfomationDetail(e.PageSource, info, infoNode, db, crawler, url);
                    }
                }
                catch (Exception ex)
                {
                    var ee = new CrawlerException()
                    {
                        crawlertype      = (int)HTZ_ExceptionHandler_ServiceTypeEnum.DataGrab,
                        exceptionbrief   = "详情解析出错",
                        exceptionmessage = ex.Message,
                        statuscode       = 500,
                        serviceid        = crawler.id
                    };
                    await CommonHelper.SaveException(ee);
                }
            };

            infoDetailCrawler.Start(new Uri(url), encode).Wait();//没被封锁就别使用代理:60.221.50.118:8090
        }
Esempio n. 3
0
        /// <summary>
        /// 抓取天气信息
        /// </summary>
        public static void WeatherCrawler(v_crawler crawler)
        {
            //获取xml配置文件
            var cfg = new XmlDocument();
            cfg.Load(crawler.xmlfile);

            var rootNode = cfg.SelectSingleNode("data");
            if (rootNode == null)
            {
                var e = new CrawlerException()
                {
                    crawlertype = (int)HTZ_ExceptionHandler_ServiceTypeEnum.DataGrab,
                    exceptionbrief = "配置文件出错",
                    exceptionmessage = "未找到主配置项data",
                    statuscode = 500,
                    serviceid = crawler.id
                };
                throw e;
            }

            var tideCrawler = new SimpleCrawler();//新建一个爬虫

            //抓取错误的处理
            tideCrawler.OnError += (s, e) =>
            {
                var ex = new CrawlerException()
                {
                    crawlertype = (int)HTZ_ExceptionHandler_ServiceTypeEnum.DataGrab,
                    exceptionbrief = "抓取出错",
                    exceptionmessage = e.Exception.Message,
                    statuscode = 500,
                    serviceid = crawler.id
                };
                throw ex;
            };

            //抓取成功后的解析
            tideCrawler.OnCompleted += async (s, e) =>
            {

                try
                {
                    using (var db = new BizDataContext())
                    {
                        await SaveWeekData(e.PageSource, rootNode, db);
                    }
                }
                catch (Exception ex)
                {
                    var ee = new CrawlerException()
                    {
                        crawlertype = (int)HTZ_ExceptionHandler_ServiceTypeEnum.DataGrab,
                        exceptionbrief = "解析出错",
                        exceptionmessage = ex.Message,
                        statuscode = 500,
                        serviceid = crawler.id
                    };
                    throw ee;
                }
            };


            //获取抓取url
            var url = rootNode.Attributes["url"].Value;

            //获取编码格式
            var encode = "utf-8";

            if (rootNode.Attributes["encode"] != null)
            {
                encode = rootNode.Attributes["encode"].Value;
            }

            //启动抓取
            if (!string.IsNullOrEmpty(url))
            {
                tideCrawler.Start(new Uri(url), encode).Wait();
            }
        }
Esempio n. 4
0
        public async static void Start()
        {
            while (true)
            {
                var waittime = 24 * 60 * 60 * 1000;
                try
                {
                    var crawler = new v_crawler();

                    using (var db = new BizDataContext())
                    {
                        //从数据库获取抓取间隔
                        var temp = db.Set<T_Configuration>().Where(p => p.Configuration_Key == CommonHelper.WEATHERKEY).FirstOrDefault();
                        if (temp != null)
                        {
                            waittime = Int32.Parse(temp.Configuration_Value);
                        }

                        //从数据库获取抓取服务的配置
                        crawler = await db.Set<T_HTZ_CrawlerService>().Where(p => p.State == v_common.YesState && p.ServiceType == (int)HTZ_CrawlerService_ServiceTypeEnum.Weather && p.IsEnable.Value).Select(p => new v_crawler
                        {
                            id = p.HTZ_CrawlerService_Id,
                            infotype = p.InfoType ?? 0,
                            name = p.HTZ_CrawlerService_Name,
                            xmlfile = p.XMLFilePath,
                            crawlertype = (int)HTZ_CrawlerService_ServiceTypeEnum.Weather
                        }).FirstOrDefaultAsync();
                    }


                    //开始天气抓取
                    WeatherCrawler(crawler);

                    //开始生活指数抓取
                    LivingIndexCrawler(crawler);

                    //记录服务状态
                    await CommonHelper.SaveNewState((int)HTZ_ServiceState_ServiceStateEnum.Fine, crawler.id);
                }
                catch (CrawlerException ex)
                {
                    await CommonHelper.SaveException(ex);
                }
                catch (Exception ex)
                {
                    var e = new CrawlerException()
                    {
                        crawlertype = (int)HTZ_ExceptionHandler_ServiceTypeEnum.DataGrab,
                        exceptionbrief = "天气抓取服务错误",
                        exceptionmessage = ex.Message,
                        statuscode = 501,
                        serviceid = 2
                    };
                    await CommonHelper.SaveException(e);
                }
                finally
                {
                    Thread.Sleep(waittime);
                }
            }
        }
Esempio n. 5
0
        /// <summary>
        /// 抓取生活指数信息
        /// </summary>
        public static void LivingIndexCrawler(v_crawler crawler)
        {
            var cfg = new XmlDocument();
            cfg.Load(crawler.xmlfile);

            var rootNode = cfg.SelectSingleNode("data");
            if (rootNode == null)
            {
                var e = new CrawlerException()
                {
                    crawlertype = (int)HTZ_ExceptionHandler_ServiceTypeEnum.DataGrab,
                    exceptionbrief = "配置文件出错",
                    exceptionmessage = "未找到主配置项data",
                    statuscode = 500,
                    serviceid = crawler.id
                };
                throw e;
            }

            var livingIndexCrawler = new SimpleCrawler();//调用刚才写的爬虫程序
            livingIndexCrawler.OnError += (s, e) =>
            {
                var ee = new CrawlerException()
                {
                    crawlertype = (int)HTZ_ExceptionHandler_ServiceTypeEnum.DataGrab,
                    exceptionbrief = "生活指数抓取出错",
                    exceptionmessage = e.Exception.Message,
                    statuscode = 500,
                    serviceid = crawler.id
                };
                throw ee;
            };
            livingIndexCrawler.OnCompleted += async (s, e) =>
            {

                try
                {
                    using (var db = new BizDataContext())
                    {
                        await SaveLivingIndexData(e.PageSource, rootNode, db);
                    }
                }
                catch (Exception ex)
                {
                    var ee = new CrawlerException()
                    {
                        crawlertype = (int)HTZ_ExceptionHandler_ServiceTypeEnum.DataGrab,
                        exceptionbrief = "详情解析出错",
                        exceptionmessage = ex.Message,
                        statuscode = 500,
                        serviceid = crawler.id
                    };
                    throw ee;
                }
            };


            var url = rootNode.SelectSingleNode("LivingIndexConfig").Attributes["url"].Value;
            var encode = "utf-8";

            if (rootNode.Attributes["encode"] != null)
            {
                encode = rootNode.Attributes["encode"].Value;
            }

            if (!string.IsNullOrEmpty(url))
            {
                livingIndexCrawler.Start(new Uri(url), encode).Wait();
            }
        }
Esempio n. 6
0
        /// <summary>
        /// 新闻抓取方法
        /// </summary>
        /// <param name="infotype">抓取的新闻栏目类型</param>
        /// <param name="infoDoc"></param>
        public async static void InfomationCrawler(v_crawler crawler)
        {
            await Task.Run(async() =>
            {
                try
                {
                    var xmlDocument = new XmlDocument();
                    xmlDocument.Load(crawler.xmlfile);

                    var rootNode = xmlDocument.SelectSingleNode("data");
                    if (rootNode == null)
                    {
                        throw new CrawlerException(crawler.id, "配置文件错误", "未找到主配置项data");
                    }

                    crawler.encode = DefaultEncode;

                    if (rootNode.Attributes["encode"] != null)
                    {
                        crawler.encode = rootNode.Attributes["encode"].Value;
                    }

                    crawler.url = rootNode.Attributes["url"].Value;


                    var infoCrawler = new SimpleCrawler();//新建一个抓取服务

                    infoCrawler.OnError += (s, e) =>
                    {
                        throw new CrawlerException(crawler.id, "获取页面代码时错误", e.Exception.Message);
                    };
                    infoCrawler.OnCompleted += async(s, e) =>
                    {
                        try
                        {
                            using (var db = new BizDataContext())
                            {
                                SaveInfomation(e.PageSource, crawler, xmlDocument, db);

                                await CommonHelper.SaveNewState((int)HTZ_ServiceState_ServiceStateEnum.Fine, crawler.id);
                            }
                        }
                        catch (Exception ex)
                        {
                            var messageException = new CrawlerException(crawler.id, "解析出错", ex.Message);
                            await SaveException(messageException);
                        }
                    };

                    if (!string.IsNullOrEmpty(crawler.url))
                    {
                        infoCrawler.Start(new Uri(crawler.url), crawler.encode).Wait();
                    }
                }
                catch (CrawlerException ex)
                {
                    await CommonHelper.SaveException(ex);
                }
                catch (Exception ex)
                {
                    var e = new CrawlerException()
                    {
                        crawlertype      = (int)HTZ_ExceptionHandler_ServiceTypeEnum.DataGrab,
                        exceptionbrief   = "信息抓取服务错误",
                        exceptionmessage = ex.Message,
                        statuscode       = 501,
                        serviceid        = 2
                    };
                    await CommonHelper.SaveException(e);
                }
            });
        }
Esempio n. 7
0
        private static async void LinkValidate(int id, string url, int serviceType, string serviceName)
        {
            try
            {
                var app = new LinkService();

                app.OnCompleted += (async(s, e) =>
                {
                    if (e.StatusCode == HttpStatusCode.OK)
                    {
                        await CommonHelper.SaveNewState((int)HTZ_ServiceState_ServiceStateEnum.Fine, id, (int)e.Milliseconds, serviceType);
                    }
                    else
                    {
                        var ex = new CrawlerException()
                        {
                            crawlertype = (int)HTZ_ExceptionHandler_ServiceTypeEnum.Service,
                            exceptionbrief = EnumHelper.GetDescription(e.StatusCode),
                            exceptionmessage = EnumHelper.GetDescription(e.StatusCode),
                            statuscode = (int)e.StatusCode,
                            serviceid = id,
                            servicename = serviceName,
                            serverAppType = serviceType
                        };
                        throw ex;
                    }
                });

                app.OnError += ((s, e) =>
                {
                    var ex = new CrawlerException()
                    {
                        crawlertype = (int)HTZ_ExceptionHandler_ServiceTypeEnum.Service,
                        exceptionbrief = "请求时出错",
                        exceptionmessage = e.Exception.Message,
                        statuscode = (int)HttpStatusCode.InternalServerError,
                        serviceid = id,
                        servicename = serviceName,
                        serverAppType = serviceType,
                    };
                    throw ex;
                });

                await app.BeginRequest(new Uri(url));
            }
            catch (CrawlerException ex)
            {
                await CommonHelper.SaveException(ex);
            }
            catch (Exception ex)
            {
                var e = new CrawlerException()
                {
                    crawlertype      = (int)HTZ_ExceptionHandler_ServiceTypeEnum.Service,
                    exceptionbrief   = "应用服务链路异常",
                    exceptionmessage = ex.Message,
                    statuscode       = 501,
                    serviceid        = id,
                    servicename      = serviceName
                };
                await CommonHelper.SaveException(e);
            }
        }