Ejemplo n.º 1
0
        /// <summary>
        /// 详情
        /// </summary>
        public void RunArticleDetail()
        {
            var rule = new SpliderRule()
            {
                ContentXPath = "//div[@id='post_detail']",
                EachXPath    = "",
                Url          = " https://www.cnblogs.com/fancunwei/p/9581168.html",
                RuleFields   = new List <RuleField>()
                {
                    new RuleField()
                    {
                        DisplayName = "标题", XPath = "*//div[@class='post']//a[@id='cb_post_title_url']", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "详情", XPath = "*//div[@class='postBody']//div[@class='blogpost-body']", Attribute = "", IsFirstInnerText = false
                    }
                }
            };
            var splider = new ArticleSplider();
            var list    = splider.GetByRule(rule);

            foreach (var item in list)
            {
                var msg = string.Empty;
                item.Fields.ForEach(M =>
                {
                    Console.WriteLine($"{M.DisplayName}:{M.Value}");
                });
                Console.WriteLine(msg);
            }
        }
Ejemplo n.º 2
0
        public AppInfo WandoujiaAppDetailUnitTest()
        {
            var rule = new SpliderRule()
            {
                ContentXPath = "//dl[@class='infos-list']",
                EachXPath    = "",
                Url          = "",
                RuleFields   = new List <RuleField>()
                {
                    new RuleField()
                    {
                        DisplayName = "Company", XPath = "//span[@class='dev-sites']", Attribute = "", IsFirstInnerText = false
                    },
                    //new RuleField(){ DisplayName="Name",XPath="div[@class='app-desc']/h2/a", IsFirstInnerText=true },
                    //new RuleField(){ DisplayName="URL",XPath="a[@class='detail-check-btn']",Attribute="href", IsFirstInnerText=false },
                }
            };
            var    splider             = new AppsSplider();
            string fileName            = @"E:\DevProjects\html-agility-pack\CsharpFanDemo-fork\CsharpFanDemo\Demo.DataSplider\testfiles\detailpage.html";
            List <SpliderContent> list = splider.GetByRuleFromFile(rule, fileName);

            AppInfo appInfo = new AppInfo();

            foreach (var item in list)
            {
                var msg = string.Empty;
                int id  = 0;
                item.Fields.ForEach(M =>
                {
                    switch (M.DisplayName)
                    {
                    case "Company":
                        appInfo.Company = M.Value;
                        break;

                    case "Description":
                        appInfo.Description = M.Value;
                        break;

                    case "URL":
                        appInfo.URL = M.Value;
                        break;

                    default:
                        break;
                    }
                    msg += $"{M.DisplayName}:{M.Value}     ";
                });
                Console.WriteLine(msg);
            }
            return(appInfo);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// 天气预报 - 15 days
        /// </summary>
        public void RunWeather15Days()
        {
            var rule = new SpliderRule()
            {
                ContentXPath = "//div[@id='15d']",
                EachXPath    = "*//li",
                Url          = "http://www.weather.com.cn/weather15d/101020100.shtml",
                RuleFields   = new List <RuleField>()
                {
                    new RuleField()
                    {
                        DisplayName = "日期", XPath = "span[@class='time']", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "天气", XPath = "span[@class='wea']", Attribute = "", IsFirstInnerText = false
                    },
                    new RuleField()
                    {
                        DisplayName = "区间", XPath = "span[@class='tem']", Attribute = "", IsFirstInnerText = false
                    },
                    new RuleField()
                    {
                        DisplayName = "风向", XPath = "span[@class='wind']", Attribute = "", IsFirstInnerText = false
                    },
                    new RuleField()
                    {
                        DisplayName = "风力", XPath = "span[@class='wind1']", Attribute = "", IsFirstInnerText = false
                    },
                }
            };
            var splider = new ArticleSplider();
            var list    = splider.GetByRule(rule);

            foreach (var item in list)
            {
                var msg = string.Empty;
                item.Fields.ForEach(M =>
                {
                    msg += $"{M.DisplayName}:{M.Value} ";
                });
                Console.WriteLine(msg);
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// 天气预报 - 7 days
        /// </summary>
        public void RunWeather7Days()
        {
            var rule = new SpliderRule()
            {
                //ContentXPath = "//div[@id='7d']",
                ContentXPath = "//ul[@class='t clearfix']",
                EachXPath    = "li[@class]",
                Url          = "http://www.weather.com.cn/weather/101250101.shtml", //长沙7天天气
                RuleFields   = new List <RuleField>()
                {
                    new RuleField()
                    {
                        DisplayName = "日期", XPath = "h1", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "天气", XPath = "p[@class='wea']", Attribute = "", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "高", XPath = "p[@class='tem']/span", Attribute = "", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "低", XPath = "p[@class='tem']/i", Attribute = "", IsFirstInnerText = true
                    },
                    //new RuleField(){ DisplayName="风向",XPath="p[@class='win']",Attribute="", IsFirstInnerText=false },
                    //new RuleField(){ DisplayName="风力",XPath="p[@class='win']",Attribute="", IsFirstInnerText=false },
                }
            };
            var splider = new ArticleSplider();
            var list    = splider.GetByRule(rule);

            foreach (var item in list)
            {
                var msg = string.Empty;
                item.Fields.ForEach(M =>
                {
                    msg += $"{M.DisplayName}:{M.Value} ";
                });
                Console.WriteLine(msg);
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        ///
        /// </summary>
        public void RunArticleRule()
        {
            var postitembodyXPath     = "div[@class='post_item_body']//";
            var postitembodyFootXPath = postitembodyXPath + "div[@class='post_item_foot']//";
            var rule = new SpliderRule()
            {
                ContentXPath = "//div[@id='post_list']",
                EachXPath    = "div[@class='post_item']",
                Url          = "https://www.cnblogs.com",
                RuleFields   = new List <RuleField>()
                {
                    new RuleField()
                    {
                        DisplayName = "推荐", XPath = "*//span[@class='diggnum']", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "标题", XPath = postitembodyXPath + "a[@class='titlelnk']", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "URL", XPath = postitembodyXPath + "a[@class='titlelnk']", Attribute = "href", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "简要", XPath = postitembodyXPath + "p[@class='post_item_summary']", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "作者", XPath = postitembodyFootXPath + "a[@class='lightblue']", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "作者URL", XPath = postitembodyFootXPath + "a[@class='lightblue']", Attribute = "href", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "讨论数", XPath = "span[@class='article_comment']", IsFirstInnerText = true, InnerTextRegex = @"[^0-9]+"
                    },
                    new RuleField()
                    {
                        DisplayName = "阅读数", XPath = postitembodyFootXPath + "span[@class='article_view']", IsFirstInnerText = true, InnerTextRegex = @"[^0-9]+"
                    },
                }
            };
            var splider = new ArticleSplider();
            var list    = splider.GetByRule(rule);

            foreach (var item in list)
            {
                var msg = string.Empty;
                item.Fields.ForEach(M =>
                {
                    if (M.DisplayName != "简要" && !M.DisplayName.Contains("URL"))
                    {
                        msg += $"{M.DisplayName}:{M.Value}";
                    }
                });
                Console.WriteLine(msg);
            }
        }
Ejemplo n.º 6
0
        public List <AppInfo> WandoujiaAppList(string content)
        {
            var rule = new SpliderRule()
            {
                //ContentXPath = "//ul[@id='j-tag-list']",//这是从网页上找。
                ContentXPath = @"/", //这是从网页上找。
                EachXPath    = "li[@class='card']",
                Url          = "",   //豌豆荚 某个分类下的应用列表。url应该作为一个参数输入。
                RuleFields   = new List <RuleField>()
                {
                    new RuleField()
                    {
                        DisplayName = "Id", XPath = "a[@class='detail-check-btn']", Attribute = "data-app-id", IsFirstInnerText = false
                    },
                    new RuleField()
                    {
                        DisplayName = "Name", XPath = "div[@class='app-desc']/h2/a", IsFirstInnerText = true
                    },
                    new RuleField()
                    {
                        DisplayName = "URL", XPath = "a[@class='detail-check-btn']", Attribute = "href", IsFirstInnerText = false
                    },
                    //new RuleField(){ DisplayName="图标",XPath="p[@class='tem']/span",Attribute="", IsFirstInnerText=true },
                    new RuleField()
                    {
                        DisplayName = "InstallCount", XPath = "div[@class='app-desc']/div[@class='meta']/span[@class='install-count']", Attribute = "", IsFirstInnerText = true
                    },
                }
            };
            var splider = new AppsSplider();
            List <SpliderContent> list = splider.GetByRule(rule, content);
            //打印出来
            List <AppInfo> appList = new List <AppInfo>();

            //AppList appList = new AppList();
            foreach (var item in list)
            {
                AppInfo appInfo = new AppInfo();
                var     msg     = string.Empty;
                int     id      = 0;
                item.Fields.ForEach(M =>
                {
                    switch (M.DisplayName)
                    {
                    case "Id":
                        appInfo.Id = M.Value;
                        break;

                    case "Name":
                        appInfo.Name = M.Value;
                        break;

                    case "URL":
                        appInfo.URL = M.Value;
                        break;

                    default:
                        break;
                    }
                    appList.Add(appInfo);
                    msg += $"{M.DisplayName}:{M.Value}     ";
                });
                Console.WriteLine(msg);
            }
            return(appList);
        }