Exemplo n.º 1
0
        public static Task Run()
        {
            //var spider = Create<VnexpressSpider>();
            var builder = new SpiderBuilder();

            builder.AddSerilog();
            builder.ConfigureAppConfiguration();
            builder.UseStandalone();
            var settings = new ProjectDefinition()
            {
                ProjectName      = "Vnexpress Spider",
                Site             = "Vnexpress/Kinh Doanh",
                ItemUrlsSelector = "",
                Urls             = "https://vnexpress.net/kinh-doanh",
                FileFormat       = "*.html",
                FileStorage      = @"P:\Neil.Test\Spider Storage\Vnexpress",
                PageLimit        = 4,
            };

            builder.Services.AddSingleton <ProjectDefinition>(settings);
            builder.AddSpider <VnexpressSpider>();
            //	builder.Services.AddSingleton<IDynamicMessageQueue, InMemoryMessageQueue>((s)=> null);
            //builder.Services.AddSingleton<IDynamicMessageQueue,InMemoryMessageQueue>();
            builder.UseDynamicMessageQueue();
            var factory = builder.Build();
            var spider  = factory.Create <VnexpressSpider>();

            return(spider.RunAsync());
        }
Exemplo n.º 2
0
        public static Task Run()
        {
            //var spider = Create<VnexpressSpider>();
            var builder = new SpiderBuilder();

            builder.AddSerilog();
            builder.Services.AddSingleton <IProxyValidator, FakeProxyValidator>();
            //builder.Services.AddSingleton<IProxyValidator, DefaultProxyValidator>();
            builder.ConfigureAppConfiguration(null, args: new string[] { "/ProxySupplyUrl=http://localhost:52445/api/proxies" }, true);
            builder.UseStandalone();
            var settings = new ProjectDefinition()
            {
                ProjectName                = "Vnexpress Spider",
                Site                       = "Vnexpress/Kinh Doanh",
                ItemUrlsSelector           = "//article/h1[@class='title_news']/a[1];//article[@class='list_news']/h4[@class='title_news']/a[1]",
                Urls                       = "https://vnexpress.net/kinh-doanh",
                FileStorage                = @"P:\Neil.Test\Spider Storage\Vnexpress",
                FileFormat                 = "*.json",
                PageLimit                  = 4,
                Deepth                     = 2,
                NextPageSelector           = "//p[@id='pagination']/a[@class='next']",
                NumberOfConcurrentRequests = 5,
                Mapping                    = new ItemMapping
                {
                    ItemCssSelector = "//section[@id='left_calculator']",
                    Mapping         = new FieldMapping[]
                    {
                        new FieldMapping {
                            Field = "Title", CssSelector = "//h1[@class='title_news_detail mb10']"
                        },
                        new FieldMapping {
                            Field = "Description", CssSelector = "//p[@class='description']"
                        },
                    }
                }
            };

            builder.Services.AddSingleton <ProjectDefinition>(settings);
            builder.AddSpider <HttpClientSpider>();
            //	builder.Services.AddSingleton<IDynamicMessageQueue, InMemoryMessageQueue>((s)=> null);
            //builder.Services.AddSingleton<IDynamicMessageQueue,InMemoryMessageQueue>();
            builder.UseDynamicMessageQueue();
            var factory = builder.Build();
            var spider  = factory.Create <HttpClientSpider>();

            return(spider.RunAsync());
        }