public static void Run()
        {
            ImageDownloader.GetInstance().Start();

            var builder = new SpiderBuilder();

            builder.AddSerilog();
            builder.ConfigureAppConfiguration();
            builder.UseStandalone();
            builder.AddSpider <EntitySpider>();
            var provider = builder.Build();
            var spider   = provider.Create <Spider>();

            spider.Id    = Guid.NewGuid().ToString("N");                // 设置任务标识
            spider.Name  = "宅男女神图片采集";                                  // 设置任务名称
            spider.Speed = 2;                                           // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0.
            spider.Depth = 5;                                           // 设置采集深度
            spider.DownloaderSettings.Type = DownloaderType.HttpClient; // 使用普通下载器, 无关 Cookie, 干净的 HttpClient
            //spider.AddDataFlow(new NvshensTagIndexDataParser());
            spider.AddDataFlow(new NvshensFirstPageTagDataParser());
            spider.AddDataFlow(new NvshensPageTagDataParser());
            spider.AddDataFlow(new NvshensFirstPageDetailDataParser());
            spider.AddDataFlow(new NvshensPageDetailDataParser());
            //spider.AddRequests("https://www.nvshens.com/gallery/"); // 设置起始链接
            spider.AddRequests("https://www.nvshens.com/gallery/luoli/"); // 设置起始链接
            spider.RunAsync();                                            // 启动
        }
Ejemplo n.º 2
0
        public static void Run()
        {
            ImageDownloader.GetInstance().Start();

            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                services.AddLocalEventBus();
                services.AddLocalDownloadCenter();
                services.AddDownloaderAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter(x => x.UseMemory());
            }).Register <EntitySpider>();
            var provider = builder.Build();
            var spider   = provider.Create <Spider>();

            spider.Id    = Guid.NewGuid().ToString("N"); // 设置任务标识
            spider.Name  = "宅男女神图片采集";                   // 设置任务名称
            spider.Speed = 2;                            // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0.
            spider.Depth = 5;                            // 设置采集深度
            //spider.AddDataFlow(new NvshensTagIndexDataParser());
            spider.AddDataFlow(new NvshensFirstPageTagDataParser());
            spider.AddDataFlow(new NvshensPageTagDataParser());
            spider.AddDataFlow(new NvshensFirstPageDetailDataParser());
            spider.AddDataFlow(new NvshensPageDetailDataParser());
            //spider.AddRequests("https://www.nvshens.com/gallery/"); // 设置起始链接
            spider.AddRequests("https://www.nvshens.com/gallery/luoli/"); // 设置起始链接
            spider.RunAsync();                                            // 启动
        }
Ejemplo n.º 3
0
        protected override Task <DataFlowResult> Parse(DataFlowContext context)
        {
            Console.WriteLine(context.Response.Request.Url);
            Dictionary <string, string> tags = new Dictionary <string, string>();
            var tagNodes = context.Selectable.Regex("Next(.+)").Nodes();

            foreach (var node in tagNodes)
            {
                //找到页面函数里面有总页数和当前页面ID
                var el     = node.GetValue().Replace("Next(", "").Replace("\\", "").Replace("\"", "");
                var elArry = el.Split(',');
                int.TryParse(elArry[1], out int pages);
                if (pages > 1 && elArry[0] == "1")
                {
                    var requests = new List <Request>();
                    for (int i = 2; i <= pages; i++)
                    {
                        var request = new Request()
                        {
                            OwnerId = context.Response.Request.OwnerId, Url = context.Response.Request.Url.Replace(".htm", $"_{i}.htm")
                        };
                        request.AddProperty("tag", context.Selectable.XPath(".//title").GetValue());
                        requests.Add(request);
                    }
                    Console.WriteLine($"{context.Response.Request.Url}\t{pages}\t{requests.Count}");
                    count += pages;
                    context.AddExtraRequests(requests.ToArray());;
                }
            }
            var imgNodes = context.Selectable.XPath(".//div[@id='ArticleId0']//p//a//img").Nodes();

            foreach (var nodes in imgNodes)
            {
                var url     = nodes.XPath("@src").GetValue();
                var newNode = (nodes as Selectable).Elements.FirstOrDefault();
                var alt     = new Selectable(newNode.OuterHtml.Replace("alt=\"\"", ""));
                var name    = alt.XPath("//img/@alt").GetValue();
                var request = new Request()
                {
                    Url = url, OwnerId = context.Response.Request.OwnerId
                };
                request.AddProperty("tag", context.Selectable.XPath(".//div[@class='position gray']//div[1]//a[2]").GetValue());
                request.AddProperty("referer", context.Response.Request.GetProperty("referer") ?? url);
                request.AddProperty("subject", name);
                ImageDownloader.GetInstance().AddRequest(request);
            }
            return(Task.FromResult(DataFlowResult.Success));
        }
Ejemplo n.º 4
0
        /// <summary>
        /// 获取图片浏览页里抽图片地址
        /// </summary>
        /// <param name="context"></param>
        public static void GetDetailPictureUrl(DataFlowContext context)
        {
            context.AddData("URL", context.Response.Request.Url);
            context.AddData("Title", context.Selectable.XPath(".//title").GetValue());

            var images = context.Selectable.XPath("//*[@id=\"hgallery\"]/img/@src").GetValues();

            foreach (var image in images)
            {
                //处理图片URL下载
                var request = new Request
                {
                    Url     = image,
                    OwnerId = context.Response.Request.OwnerId
                };
                request.AddProperty("tag", context.Response.Request.GetProperty("tag"));
                request.AddProperty("referer", context.Response.Request.GetProperty("referer"));
                request.AddProperty("subject", context.Selectable.XPath(".//title").GetValue());
                ImageDownloader.GetInstance().AddRequest(request);
            }
        }