public static void Run() { ImageDownloader.GetInstance().Start(); var builder = new SpiderBuilder(); builder.AddSerilog(); builder.ConfigureAppConfiguration(); builder.UseStandalone(); builder.AddSpider <EntitySpider>(); var provider = builder.Build(); var spider = provider.Create <Spider>(); spider.Id = Guid.NewGuid().ToString("N"); // 设置任务标识 spider.Name = "宅男女神图片采集"; // 设置任务名称 spider.Speed = 2; // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0. spider.Depth = 5; // 设置采集深度 spider.DownloaderSettings.Type = DownloaderType.HttpClient; // 使用普通下载器, 无关 Cookie, 干净的 HttpClient //spider.AddDataFlow(new NvshensTagIndexDataParser()); spider.AddDataFlow(new NvshensFirstPageTagDataParser()); spider.AddDataFlow(new NvshensPageTagDataParser()); spider.AddDataFlow(new NvshensFirstPageDetailDataParser()); spider.AddDataFlow(new NvshensPageDetailDataParser()); //spider.AddRequests("https://www.nvshens.com/gallery/"); // 设置起始链接 spider.AddRequests("https://www.nvshens.com/gallery/luoli/"); // 设置起始链接 spider.RunAsync(); // 启动 }
public static void Run() { ImageDownloader.GetInstance().Start(); var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddLocalEventBus(); services.AddLocalDownloadCenter(); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }).Register <EntitySpider>(); var provider = builder.Build(); var spider = provider.Create <Spider>(); spider.Id = Guid.NewGuid().ToString("N"); // 设置任务标识 spider.Name = "宅男女神图片采集"; // 设置任务名称 spider.Speed = 2; // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0. spider.Depth = 5; // 设置采集深度 //spider.AddDataFlow(new NvshensTagIndexDataParser()); spider.AddDataFlow(new NvshensFirstPageTagDataParser()); spider.AddDataFlow(new NvshensPageTagDataParser()); spider.AddDataFlow(new NvshensFirstPageDetailDataParser()); spider.AddDataFlow(new NvshensPageDetailDataParser()); //spider.AddRequests("https://www.nvshens.com/gallery/"); // 设置起始链接 spider.AddRequests("https://www.nvshens.com/gallery/luoli/"); // 设置起始链接 spider.RunAsync(); // 启动 }
protected override Task <DataFlowResult> Parse(DataFlowContext context) { Console.WriteLine(context.Response.Request.Url); Dictionary <string, string> tags = new Dictionary <string, string>(); var tagNodes = context.Selectable.Regex("Next(.+)").Nodes(); foreach (var node in tagNodes) { //找到页面函数里面有总页数和当前页面ID var el = node.GetValue().Replace("Next(", "").Replace("\\", "").Replace("\"", ""); var elArry = el.Split(','); int.TryParse(elArry[1], out int pages); if (pages > 1 && elArry[0] == "1") { var requests = new List <Request>(); for (int i = 2; i <= pages; i++) { var request = new Request() { OwnerId = context.Response.Request.OwnerId, Url = context.Response.Request.Url.Replace(".htm", $"_{i}.htm") }; request.AddProperty("tag", context.Selectable.XPath(".//title").GetValue()); requests.Add(request); } Console.WriteLine($"{context.Response.Request.Url}\t{pages}\t{requests.Count}"); count += pages; context.AddExtraRequests(requests.ToArray());; } } var imgNodes = context.Selectable.XPath(".//div[@id='ArticleId0']//p//a//img").Nodes(); foreach (var nodes in imgNodes) { var url = nodes.XPath("@src").GetValue(); var newNode = (nodes as Selectable).Elements.FirstOrDefault(); var alt = new Selectable(newNode.OuterHtml.Replace("alt=\"\"", "")); var name = alt.XPath("//img/@alt").GetValue(); var request = new Request() { Url = url, OwnerId = context.Response.Request.OwnerId }; request.AddProperty("tag", context.Selectable.XPath(".//div[@class='position gray']//div[1]//a[2]").GetValue()); request.AddProperty("referer", context.Response.Request.GetProperty("referer") ?? url); request.AddProperty("subject", name); ImageDownloader.GetInstance().AddRequest(request); } return(Task.FromResult(DataFlowResult.Success)); }
/// <summary> /// 获取图片浏览页里抽图片地址 /// </summary> /// <param name="context"></param> public static void GetDetailPictureUrl(DataFlowContext context) { context.AddData("URL", context.Response.Request.Url); context.AddData("Title", context.Selectable.XPath(".//title").GetValue()); var images = context.Selectable.XPath("//*[@id=\"hgallery\"]/img/@src").GetValues(); foreach (var image in images) { //处理图片URL下载 var request = new Request { Url = image, OwnerId = context.Response.Request.OwnerId }; request.AddProperty("tag", context.Response.Request.GetProperty("tag")); request.AddProperty("referer", context.Response.Request.GetProperty("referer")); request.AddProperty("subject", context.Selectable.XPath(".//title").GetValue()); ImageDownloader.GetInstance().AddRequest(request); } }