Exemple #1
0
        static void Main(string[] args)
        {
            var config = new Config
            {
                Name     = "quanmin",
                ScanUrls = "https://www.quanmin.tv/game/all",
                Fields   = new[]
                {
                    new Field
                    {
                        Name         = "title",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.title"
                    },
                    new Field
                    {
                        Name         = "username",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.nick"
                    },
                    new Field
                    {
                        Name         = "online",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.view",
                        Type         = FieldType.Int,
                    },
                    new Field
                    {
                        Name         = "fanscount",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.follow",
                        Type         = FieldType.Int,
                    },
                    new Field
                    {
                        Name         = "cate",
                        Selector     = "$.category_name",
                        Selectortype = SelectorType.JsonPath
                    }
                },
                RepeatWhen = RepeatWhenEver.hour,
                RepeatAt   = new TimeSpan(0, 5, 0),
            };

            crawler = new CrawlerDotNet.Core.Crawler();

            var curPage = 1;

            crawler.BeforeCrawl = () =>
            {
                curPage = 1;
            };

            crawler.Downloader.AfterDownloadPage = p =>
            {
                if (p.Request.Type != PageType.ContextUrl)
                {
                    return;
                }

                //处理页面
                var r = new Regex("{\"uid([\\s\\S]*)\"ignore_ad\":true}");
                var m = r.Match(p.Html);
                p.Html = m.Value;
            };

            crawler.Processor.OnProcessScanPage = p =>
            {
                var r = new Regex("total:([0-9]*),");
                var m = r.Match(p.Html.Replace(" ", string.Empty));

                //得到页码
                var page = int.Parse(m.Groups[1].Value);

                for (int i = 1; i <= page; i++)
                {
                    crawler.Schduler.AddUrl($"https://www.quanmin.tv/game/all?p={i}", PageType.HelperUrl);
                }

                p.SkipExtract();
            };
            crawler.Processor.OnProcessHelperPage = p =>
            {
                var r = new Regex("\"evtname\":\"([0-9]*)\"");

                var ms = r.Matches(p.Html);

                foreach (Match m in ms)
                {
                    crawler.Schduler.AddUrl("https://www.quanmin.tv/" + m.Groups[1].Value);
                }

                p.SkipExtract();
            };

            crawler.Setup(config);
            crawler.Start();
            Console.WriteLine("end");
            Console.ReadKey();
        }
Exemple #2
0
        static void Main(string[] args)
        {
            var config = new Config
            {
                Name     = "longzhu",
                ScanUrls = "http://api.plu.cn/tga/streams?max-results=50&start-index=0&sort-by=views&filter=0&game=0",
                Fields   = new[]
                {
                    new Field
                    {
                        Name         = "title",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.channel.status"
                    },
                    new Field
                    {
                        Name         = "username",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.channel.name"
                    },
                    new Field
                    {
                        Name         = "online",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.viewers",
                        Type         = FieldType.Int,
                    },
                    new Field
                    {
                        Name         = "fanscount",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.channel.followers",
                        Type         = FieldType.Int,
                    },
                    new Field
                    {
                        Name         = "cate",
                        Selector     = "$.game[0].name",
                        Selectortype = SelectorType.JsonPath
                    }
                },
                RepeatWhen = RepeatWhenEver.hour,
                RepeatAt   = new TimeSpan(0, 25, 0),
            };

            crawler = new Crawler();



            crawler.Downloader.AfterDownloadPage = p =>
            {
            };
            crawler.Processor.OnCustomExtract = p =>
            {
                var j  = JObject.Parse(p.Html);
                var jr = JArray.FromObject(j["data"]["items"]);


                for (int i = 0; i < jr.Count; i++)
                {
                    var exres = new ExtractResults();
                    var info  = jr[i];
                    foreach (var f in config.Fields)
                    {
                        var res = new Result(f.Name, info.SelectToken(f.Selector).ToString());
                        exres.Add(res);
                    }
                    p.Results.Add(exres);
                }
            };
            crawler.Processor.OnProcessScanPage = p =>
            {
                var totalcount = p.GetJson("$.data.totalItems");
                var pagecount  = int.Parse(totalcount) / 50 + 1;

                for (int i = 1; i <= pagecount; i++)
                {
                    crawler.Schduler.AddUrl($"http://api.plu.cn/tga/streams?max-results=200&start-index={i * 50}&sort-by=views&filter=0&game=0");
                }
            };
            crawler.Setup(config);
            crawler.Start();
            Console.WriteLine("end");
            Console.ReadKey();
        }
Exemple #3
0
        static void Main(string[] args)
        {
            var config = new Config
            {
                Name     = "chushou",
                ScanUrls = "https://chushou.tv/live/down-v2.htm",
                Fields   = new[]
                {
                    new Field
                    {
                        Name         = "title",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.name"
                    },
                    new Field
                    {
                        Name         = "username",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.meta.creator"
                    },
                    new Field
                    {
                        Name         = "online",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.meta.onlineCount",
                        Type         = FieldType.Int,
                    },
                    new Field
                    {
                        Name         = "fanscount",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.meta.subscriberCount",
                        Type         = FieldType.Int,
                    },
                    new Field
                    {
                        Name         = "cate",
                        Selector     = "$.meta.gameName",
                        Selectortype = SelectorType.JsonPath
                    }
                },
                RepeatWhen = RepeatWhenEver.hour,
                RepeatAt   = new TimeSpan(0, 35, 0),
            };

            crawler = new CrawlerDotNet.Core.Crawler();
            string lastpoint = "";

            crawler.Processor.OnProcessScanPage = p =>
            {
                var point = p.GetJson("$.data.breakpoint");
                crawler.Schduler.AddUrl("https://chushou.tv/live/down-v2.htm?&breakpoint=" + point, point != lastpoint ? PageType.ScanUrl : PageType.ContextUrl);
                lastpoint = point;
            };
            crawler.Processor.OnCustomExtract = p =>
            {
                var j  = JObject.Parse(p.Html);
                var jr = JArray.FromObject(j["data"]["items"]);


                for (int i = 0; i < jr.Count; i++)
                {
                    var exres = new ExtractResults();
                    var info  = jr[i];
                    foreach (var f in config.Fields)
                    {
                        var res = new Result(f.Name, info.SelectToken(f.Selector).ToString());
                        exres.Add(res);
                    }
                    p.Results.Add(exres);
                }
            };
            crawler.Setup(config);
            crawler.Start();
            Console.WriteLine("end");
            Console.ReadKey();
        }
Exemple #4
0
        static void douyuSample()
        {
            //https://www.douyu.com/directory/all
            //https://www.douyu.com/directory/all?page=1&isAjax=1
            //http://open.douyucdn.cn/api/RoomApi/room/
            #region config
            var c = new Config
            {
                Name     = "douyu",
                ScanUrls = "https://www.douyu.com/directory/all",

                ContentUrlRegexes = new Regex("room"),
                HelperUrlRegexes  = new Regex("page"),

                Fields = new[]
                {
                    new Field
                    {
                        Name         = "title",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.data.room_name"
                    }, new Field
                    {
                        Name         = "username",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.data.owner_name"
                    }, new Field
                    {
                        Name         = "online",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.data.online",
                        Type         = FieldType.Int,
                    }, new Field
                    {
                        Name         = "fanscount",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.data.fans_num",
                        Type         = FieldType.Int,
                    }, new Field
                    {
                        Name         = "cate",
                        Selector     = "$.data.cate_name",
                        Selectortype = SelectorType.JsonPath
                    }, new Field
                    {
                        Name         = "startat",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.data.start_time",
                        Type         = FieldType.String,
                    },
                },
                RepeatWhen = RepeatWhenEver.hour,
                RepeatAt   = new TimeSpan(0, 20, 0),
            };
            #endregion
            douyu = new CrawlerDotNet.Core.Crawler();

            douyu.Setup(c);
            douyu.Processor.OnProcessHelperPage = p =>
            {
                var r  = new Regex("href=\"([\\s\\S]*?)\" title=");
                var ms = r.Matches(p.Html);
                foreach (Match m in ms)
                {
                    douyu.Schduler.AddUrl("http://open.douyucdn.cn/api/RoomApi/room/" + m.Groups[1].Value, p.Request.Deth + 1);
                }

                p.SkipExtract();
            };
            douyu.Processor.OnProcessScanPage = p =>
            {
                var r = new Regex(@"count:(.+),");

                var m     = r.Match(p.Html);
                var count = int.Parse(m.Groups[1].Value.Replace("\"", string.Empty));
#if DEBUG
                // count = 0;
#endif
                for (int i = 0; i < count; i++)
                {
                    douyu.Schduler.AddUrl($"https://www.douyu.com/directory/all?page={ i + 1}&isAjax=1", PageType.HelperUrl, p.Request.Deth + 1);
                }
                p.SkipExtract();
            };
            douyu.Start();
        }
Exemple #5
0
        static void Main(string[] args)
        {
            var config = new Config
            {
                Name     = "zhanqi",
                ScanUrls = "http://www.zhanqi.tv/api/static/v2.1/live/list/200/1.json",
                Fields   = new[]
                {
                    new Field
                    {
                        Name         = "title",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.title"
                    },
                    new Field
                    {
                        Name         = "username",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.nickname"
                    },
                    new Field
                    {
                        Name         = "online",
                        Selectortype = SelectorType.JsonPath,
                        Selector     = "$.online",
                        Type         = FieldType.Int,
                    },
                    new Field
                    {
                        ////*[@id="js-room-anchor-info-area"]/div[2]/div[1]/div/span[1]
                        Name         = "fanscount",
                        Selectortype = SelectorType.Regex,
                        Selector     = "js-room-follow-num\">([0-9]*)<",
                        Type         = FieldType.Int,
                    },
                    new Field
                    {
                        Name         = "cate",
                        Selector     = "$.newGameName",
                        Selectortype = SelectorType.JsonPath
                    },
                    new Field
                    {
                        Name         = "childcate",
                        Selector     = "$.gameName",
                        Selectortype = SelectorType.JsonPath
                    }
                },
                RepeatWhen = RepeatWhenEver.hour,
                RepeatAt   = new TimeSpan(0, 10, 0),
            };

            crawler = new CrawlerDotNet.Core.Crawler();

            var curPage = 1;

            crawler.BeforeCrawl = () =>
            {
                curPage = 1;
            };

            crawler.Downloader.AfterDownloadPage = p =>
            {
                //是不是有数据.有数据加入下一个json

                var rooms = p.GetJson("$.data.rooms");

                if (rooms != "[]")
                {
                    curPage++;
                    crawler.Schduler.AddUrl($"http://www.zhanqi.tv/api/static/v2.1/live/list/200/{curPage}.json");
                }
            };
            crawler.Processor.OnCustomExtract = p =>
            {
                var j  = JObject.Parse(p.Html);
                var jr = JArray.FromObject(j["data"]["rooms"]);


                for (int i = 0; i < jr.Count; i++)
                {
                    var exres = new ExtractResults();
                    var info  = jr[i];
                    foreach (var f in config.Fields)
                    {
                        if (f.Name == "fanscount")
                        {
                            //请求订阅


                            var fanspage = crawler.Downloader.DownloaderOnly(new Request(crawler.Schduler)
                            {
                                Url =
                                    "https://www.zhanqi.tv" +
                                    info.SelectToken("$.url").ToString()
                            });
                            var r = BaseProcessor.DoRegex(fanspage.Html, f);
                            if (r.Value == "")
                            {
                                r.Value = "0";
                            }
                            exres.Add(r);

                            continue;
                        }



                        var res = new Result(f.Name, info.SelectToken(f.Selector).ToString());
                        exres.Add(res);
                    }
                    p.Results.Add(exres);
                }
            };
            crawler.Setup(config);
            crawler.Start();

            Console.WriteLine("end");
            Console.ReadKey();
        }