Beispiel #1
0
        /// <summary>
        /// Begin spider
        /// </summary>
        private static void Run()
        {
            //Add other areaInfo
            Dictionary <string, string> areaDic = DazhongdianpingArea.GetAreaDic();
            List <string> urls = new List <string>();

            foreach (var key in areaDic.Keys)
            {
                for (int i = 1; i <= 50; i++)
                {
                    urls.Add(string.Format(urlAreaInfo, key, i));
                }
            }
            RunSpider runSpiders = new RunSpider("DazhongdianpingProcessor", "DazhongdianpingPipeline", "UTF-8", true);

            runSpiders.Run(urls);

            //RunSpider runSpider = new RunSpider("DazhongdianpingProcessor", "DazhongdianpingPipeline", "UTF-8", true);
            //runSpider.Run(urlInfo, 50);
        }
        /// <summary>
        /// 重新父类方法,开始执行数据获取操作
        /// </summary>
        /// <param name="page"></param>
        protected override void Handle(Page page)
        {
            // 利用 Selectable 查询并构造自己想要的数据对象
            var totalVideoElements = page.Selectable.SelectList(Selectors.XPath(".//div[@class='shop-list J_shop-list shop-all-list']/ul/li")).Nodes();

            if (totalVideoElements == null)
            {
                return;
            }
            //定义需处理数据集合
            List <Restaurant> restaurantList = new List <Restaurant>();

            foreach (var restElement in totalVideoElements)
            {
                var restaurant = new Restaurant()
                {
                    SourceWebsite = SourceWebsite
                };
                //下面通过xpath开始获取餐厅信息
                restaurant.Name = restElement.Select(Selectors.XPath(".//h4")).GetValue();
                var price = restElement.Select(Selectors.XPath(".//div[@class='txt']/div/a[@class='mean-price']/b")).GetValue();
                restaurant.AveragePrice = string.IsNullOrEmpty(price) ? "0" : price.Replace("¥", "");
                restaurant.Type         = restElement.Select(Selectors.XPath(".//div[@class='txt']/div[@class='tag-addr']/a/span[@class='tag']")).GetValue();
                restaurant.Star         = restElement.Select(Selectors.XPath(".//div[@class='txt']/div[@class='comment']/span/@title")).GetValue();
                restaurant.ImageUrl     = restElement.Select(Selectors.XPath(".//div[@class='pic']/a/img/@src")).GetValue();
                var areaCode = page.Url.Substring(page.Url.LastIndexOf('/') + 1);
                if (!string.IsNullOrEmpty(areaCode) && (areaCode.Contains("r") || areaCode.Contains("c")))
                {
                    Dictionary <string, string> areaDic = DazhongdianpingArea.GetAreaDic();
                    string result = areaCode.Substring(0, areaCode.IndexOf('p'));
                    if (areaDic.ContainsKey(result))
                    {
                        restaurant.Area = areaDic[result];
                    }
                }

                List <ISelectable> infoList = restElement.SelectList(Selectors.XPath("./div[@class='txt']/span[@class='comment-list']/span/b")).Nodes() as List <ISelectable>;
                if (infoList != null && infoList.Count > 0)
                {
                    var result = infoList[0].GetValue();
                    restaurant.Taste        = string.IsNullOrEmpty(result) ? string.Empty : result;
                    result                  = infoList[1].GetValue();
                    restaurant.Environment  = string.IsNullOrEmpty(result) ? string.Empty : result;
                    result                  = infoList[2].GetValue();
                    restaurant.ServiceScore = string.IsNullOrEmpty(result) ? string.Empty : result;
                }

                var recommetList = restElement.SelectList(Selectors.XPath(".//div[@class='txt']/div[@class='recommend']/a")).Nodes();
                restaurant.Recommendation = string.Join(",", recommetList.Select(o => o.GetValue()));
                restaurant.Address        = restElement.Select(Selectors.XPath(".//div[@class='txt']/div[@class='tag-addr']/span")).GetValue();
                restaurant.Position       = restElement.Select(Selectors.XPath(".//div[@class='txt']/div[@class='tag-addr']/a[@data-click-name='shop_tag_region_click']/span[@class='tag']")).GetValue();

                var shopUrl = restElement.Select(Selectors.XPath(".//div[@class='txt']/div/a/@href")).GetValue();
                restaurant.Code = shopUrl.Substring(shopUrl.LastIndexOf('/') + 1);
                restaurantList.Add(restaurant);

                //add next links
                if (!string.IsNullOrEmpty(shopUrl))
                {
                    this.foodUrls.Add(shopUrl);
                }
            }
            // 如果进行二级爬虫,取消注释,并且实现对应的两个类
            //InvokeCallback("DazhongdianpingFoodProcessor", "DazhongdianpingFoodPipeline");
            // Save data object by key. 以自定义KEY存入page对象中供Pipeline调用
            page.AddResultItem("RestaurantList", restaurantList);
        }