static void Main(string[] args) { var config = new Config { Name = "quanmin", ScanUrls = "https://www.quanmin.tv/game/all", Fields = new[] { new Field { Name = "title", Selectortype = SelectorType.JsonPath, Selector = "$.title" }, new Field { Name = "username", Selectortype = SelectorType.JsonPath, Selector = "$.nick" }, new Field { Name = "online", Selectortype = SelectorType.JsonPath, Selector = "$.view", Type = FieldType.Int, }, new Field { Name = "fanscount", Selectortype = SelectorType.JsonPath, Selector = "$.follow", Type = FieldType.Int, }, new Field { Name = "cate", Selector = "$.category_name", Selectortype = SelectorType.JsonPath } }, RepeatWhen = RepeatWhenEver.hour, RepeatAt = new TimeSpan(0, 5, 0), }; crawler = new CrawlerDotNet.Core.Crawler(); var curPage = 1; crawler.BeforeCrawl = () => { curPage = 1; }; crawler.Downloader.AfterDownloadPage = p => { if (p.Request.Type != PageType.ContextUrl) { return; } //处理页面 var r = new Regex("{\"uid([\\s\\S]*)\"ignore_ad\":true}"); var m = r.Match(p.Html); p.Html = m.Value; }; crawler.Processor.OnProcessScanPage = p => { var r = new Regex("total:([0-9]*),"); var m = r.Match(p.Html.Replace(" ", string.Empty)); //得到页码 var page = int.Parse(m.Groups[1].Value); for (int i = 1; i <= page; i++) { crawler.Schduler.AddUrl($"https://www.quanmin.tv/game/all?p={i}", PageType.HelperUrl); } p.SkipExtract(); }; crawler.Processor.OnProcessHelperPage = p => { var r = new Regex("\"evtname\":\"([0-9]*)\""); var ms = r.Matches(p.Html); foreach (Match m in ms) { crawler.Schduler.AddUrl("https://www.quanmin.tv/" + m.Groups[1].Value); } p.SkipExtract(); }; crawler.Setup(config); crawler.Start(); Console.WriteLine("end"); Console.ReadKey(); }
static void Main(string[] args) { var config = new Config { Name = "longzhu", ScanUrls = "http://api.plu.cn/tga/streams?max-results=50&start-index=0&sort-by=views&filter=0&game=0", Fields = new[] { new Field { Name = "title", Selectortype = SelectorType.JsonPath, Selector = "$.channel.status" }, new Field { Name = "username", Selectortype = SelectorType.JsonPath, Selector = "$.channel.name" }, new Field { Name = "online", Selectortype = SelectorType.JsonPath, Selector = "$.viewers", Type = FieldType.Int, }, new Field { Name = "fanscount", Selectortype = SelectorType.JsonPath, Selector = "$.channel.followers", Type = FieldType.Int, }, new Field { Name = "cate", Selector = "$.game[0].name", Selectortype = SelectorType.JsonPath } }, RepeatWhen = RepeatWhenEver.hour, RepeatAt = new TimeSpan(0, 25, 0), }; crawler = new Crawler(); crawler.Downloader.AfterDownloadPage = p => { }; crawler.Processor.OnCustomExtract = p => { var j = JObject.Parse(p.Html); var jr = JArray.FromObject(j["data"]["items"]); for (int i = 0; i < jr.Count; i++) { var exres = new ExtractResults(); var info = jr[i]; foreach (var f in config.Fields) { var res = new Result(f.Name, info.SelectToken(f.Selector).ToString()); exres.Add(res); } p.Results.Add(exres); } }; crawler.Processor.OnProcessScanPage = p => { var totalcount = p.GetJson("$.data.totalItems"); var pagecount = int.Parse(totalcount) / 50 + 1; for (int i = 1; i <= pagecount; i++) { crawler.Schduler.AddUrl($"http://api.plu.cn/tga/streams?max-results=200&start-index={i * 50}&sort-by=views&filter=0&game=0"); } }; crawler.Setup(config); crawler.Start(); Console.WriteLine("end"); Console.ReadKey(); }
static void Main(string[] args) { var config = new Config { Name = "chushou", ScanUrls = "https://chushou.tv/live/down-v2.htm", Fields = new[] { new Field { Name = "title", Selectortype = SelectorType.JsonPath, Selector = "$.name" }, new Field { Name = "username", Selectortype = SelectorType.JsonPath, Selector = "$.meta.creator" }, new Field { Name = "online", Selectortype = SelectorType.JsonPath, Selector = "$.meta.onlineCount", Type = FieldType.Int, }, new Field { Name = "fanscount", Selectortype = SelectorType.JsonPath, Selector = "$.meta.subscriberCount", Type = FieldType.Int, }, new Field { Name = "cate", Selector = "$.meta.gameName", Selectortype = SelectorType.JsonPath } }, RepeatWhen = RepeatWhenEver.hour, RepeatAt = new TimeSpan(0, 35, 0), }; crawler = new CrawlerDotNet.Core.Crawler(); string lastpoint = ""; crawler.Processor.OnProcessScanPage = p => { var point = p.GetJson("$.data.breakpoint"); crawler.Schduler.AddUrl("https://chushou.tv/live/down-v2.htm?&breakpoint=" + point, point != lastpoint ? PageType.ScanUrl : PageType.ContextUrl); lastpoint = point; }; crawler.Processor.OnCustomExtract = p => { var j = JObject.Parse(p.Html); var jr = JArray.FromObject(j["data"]["items"]); for (int i = 0; i < jr.Count; i++) { var exres = new ExtractResults(); var info = jr[i]; foreach (var f in config.Fields) { var res = new Result(f.Name, info.SelectToken(f.Selector).ToString()); exres.Add(res); } p.Results.Add(exres); } }; crawler.Setup(config); crawler.Start(); Console.WriteLine("end"); Console.ReadKey(); }
static void douyuSample() { //https://www.douyu.com/directory/all //https://www.douyu.com/directory/all?page=1&isAjax=1 //http://open.douyucdn.cn/api/RoomApi/room/ #region config var c = new Config { Name = "douyu", ScanUrls = "https://www.douyu.com/directory/all", ContentUrlRegexes = new Regex("room"), HelperUrlRegexes = new Regex("page"), Fields = new[] { new Field { Name = "title", Selectortype = SelectorType.JsonPath, Selector = "$.data.room_name" }, new Field { Name = "username", Selectortype = SelectorType.JsonPath, Selector = "$.data.owner_name" }, new Field { Name = "online", Selectortype = SelectorType.JsonPath, Selector = "$.data.online", Type = FieldType.Int, }, new Field { Name = "fanscount", Selectortype = SelectorType.JsonPath, Selector = "$.data.fans_num", Type = FieldType.Int, }, new Field { Name = "cate", Selector = "$.data.cate_name", Selectortype = SelectorType.JsonPath }, new Field { Name = "startat", Selectortype = SelectorType.JsonPath, Selector = "$.data.start_time", Type = FieldType.String, }, }, RepeatWhen = RepeatWhenEver.hour, RepeatAt = new TimeSpan(0, 20, 0), }; #endregion douyu = new CrawlerDotNet.Core.Crawler(); douyu.Setup(c); douyu.Processor.OnProcessHelperPage = p => { var r = new Regex("href=\"([\\s\\S]*?)\" title="); var ms = r.Matches(p.Html); foreach (Match m in ms) { douyu.Schduler.AddUrl("http://open.douyucdn.cn/api/RoomApi/room/" + m.Groups[1].Value, p.Request.Deth + 1); } p.SkipExtract(); }; douyu.Processor.OnProcessScanPage = p => { var r = new Regex(@"count:(.+),"); var m = r.Match(p.Html); var count = int.Parse(m.Groups[1].Value.Replace("\"", string.Empty)); #if DEBUG // count = 0; #endif for (int i = 0; i < count; i++) { douyu.Schduler.AddUrl($"https://www.douyu.com/directory/all?page={ i + 1}&isAjax=1", PageType.HelperUrl, p.Request.Deth + 1); } p.SkipExtract(); }; douyu.Start(); }
static void Main(string[] args) { var config = new Config { Name = "zhanqi", ScanUrls = "http://www.zhanqi.tv/api/static/v2.1/live/list/200/1.json", Fields = new[] { new Field { Name = "title", Selectortype = SelectorType.JsonPath, Selector = "$.title" }, new Field { Name = "username", Selectortype = SelectorType.JsonPath, Selector = "$.nickname" }, new Field { Name = "online", Selectortype = SelectorType.JsonPath, Selector = "$.online", Type = FieldType.Int, }, new Field { ////*[@id="js-room-anchor-info-area"]/div[2]/div[1]/div/span[1] Name = "fanscount", Selectortype = SelectorType.Regex, Selector = "js-room-follow-num\">([0-9]*)<", Type = FieldType.Int, }, new Field { Name = "cate", Selector = "$.newGameName", Selectortype = SelectorType.JsonPath }, new Field { Name = "childcate", Selector = "$.gameName", Selectortype = SelectorType.JsonPath } }, RepeatWhen = RepeatWhenEver.hour, RepeatAt = new TimeSpan(0, 10, 0), }; crawler = new CrawlerDotNet.Core.Crawler(); var curPage = 1; crawler.BeforeCrawl = () => { curPage = 1; }; crawler.Downloader.AfterDownloadPage = p => { //是不是有数据.有数据加入下一个json var rooms = p.GetJson("$.data.rooms"); if (rooms != "[]") { curPage++; crawler.Schduler.AddUrl($"http://www.zhanqi.tv/api/static/v2.1/live/list/200/{curPage}.json"); } }; crawler.Processor.OnCustomExtract = p => { var j = JObject.Parse(p.Html); var jr = JArray.FromObject(j["data"]["rooms"]); for (int i = 0; i < jr.Count; i++) { var exres = new ExtractResults(); var info = jr[i]; foreach (var f in config.Fields) { if (f.Name == "fanscount") { //请求订阅 var fanspage = crawler.Downloader.DownloaderOnly(new Request(crawler.Schduler) { Url = "https://www.zhanqi.tv" + info.SelectToken("$.url").ToString() }); var r = BaseProcessor.DoRegex(fanspage.Html, f); if (r.Value == "") { r.Value = "0"; } exres.Add(r); continue; } var res = new Result(f.Name, info.SelectToken(f.Selector).ToString()); exres.Add(res); } p.Results.Add(exres); } }; crawler.Setup(config); crawler.Start(); Console.WriteLine("end"); Console.ReadKey(); }