Example #1
0
        public SpiderContext ToRuntimeContext()
        {
            SpiderContext context = new SpiderContext();

            context.CachedSize        = CachedSize;
            context.PageHandlers      = GetCustomziePage(PageHandlers);
            context.TargetUrlsHandler = GetCustomizeTargetUrls(TargetUrlsHandler);
            context.Deep                  = Deep;
            context.Downloader            = GetDownloader(Downloader);
            context.EmptySleepTime        = EmptySleepTime;
            context.Entities              = Entities;
            context.SkipWhenResultIsEmpty = SkipWhenResultIsEmpty;
            context.Pipelines             = GetPipepines(Pipelines);
            context.PrepareStartUrls      = GetPrepareStartUrls(PrepareStartUrls);
            context.Redialer              = GetRedialer(Redialer);
            context.Scheduler             = GetScheduler(Scheduler);
            context.Site                  = Site;
            context.StartUrls             = StartUrls;
            context.SpiderName            = SpiderName;
            context.ThreadNum             = ThreadNum;
            context.EnviromentValues      = EnviromentValues;
            context.Validations           = GetValidations(Validations);
            context.UserId                = UserId;
            context.TaskGroup             = TaskGroup;
            context.TargetUrlExtractInfos = GetTargetUrlExtractInfos(TargetUrlExtractInfos);
            return(context);
        }
Example #2
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetTaskGroup("JD sku/store test");
            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306"
            });
            context.SetScheduler(new RedisScheduler
            {
                Host     = "ooodata.com",
                Password = "******",
                Port     = 6379
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product));
            return(context);
        }
Example #3
0
        public bool Accept(Uri location, SpiderContext context, ISpiderResource parentSpider)
        {
            UrlType urlType = ((WebSpiderResource)parentSpider).addingUrlType;

            if (
                !(urlType == UrlType.Img && this.downloadImagesFromExternalSite)
                &&
                !(urlType != UrlType.Img && this.downloadResourceFromExternalSite)
                )
            {
                string tempbase = context.BaseLocation;
                int    i;

                if ((i = tempbase.LastIndexOf("/")) >= 0)
                {
                    tempbase = tempbase.Substring(0, i + 1);
                }

                if (!location.OriginalString.StartsWith(tempbase))
                {
                    return(false);
                }
            }

            return(true);
        }
Example #4
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetSpiderName("ddeng.com " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.SetSite(new Site
            {
                Cookie  = "sid=dea284fc36c24e8cbcd447343d7b8a4e; sn=DD962248; ctid=000000; ctnm=%E5%8F%A4%E9%95%87%E7%81%AF%E9%A5%B0%E6%89%B9%E5%8F%91; ctpv=%E5%B9%BF%E4%B8%9C; JSESSIONID=acbBqFfOD4I63d9PziDvv; DDENG=c4fc08ae2e3ba3efeddbc667c2f45e615a85e80009169501dc244a03e87908aa61146548b97ed9c7dc07af23bfd80bff5008f8c8867a9165d4bd2732aca0db7dedae2e042d3968fcad1150f36be242e8a32a3f59db2a0b39216a59f1628508c5799644532a9d99925f9841b3c13a1f97; userId=10003379; previousUser=%E5%A4%95%E7%8E%89; Hm_lvt_9e33f153f28be198970d205d90a24f28=1466146335; Hm_lpvt_9e33f153f28be198970d205d90a24f28=1466146392; Hm_lvt_54b4cb498afd05463ab4611b38a6f289=1466146335; Hm_lpvt_54b4cb498afd05463ab4611b38a6f289=1466146392; CNZZDATA1256982382=395301521-1466143554-%7C1466143554",
                Headers = new Dictionary <string, string>
                {
                    { "Cache-Control", "max-age=0" },
                    { "Upgrade-Insecure-Requests", "1" }
                },
                UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",
                Accept    = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=;User ID=root;Password=;Port=4306"
            });
            context.AddStartUrl("http://www.ddeng.com/product/967659");
            context.AddEntityType(typeof(Corp));

            return(context);
        }
Example #5
0
        public bool Accept(Uri location, SpiderContext context, ISpiderResource parentSpider)
        {
            UrlType urlType = ((WebSpiderResource)parentSpider).addingUrlType;
            
            if (
            !(urlType == UrlType.Img && this.downloadImagesFromExternalSite)
                &&
            !(urlType != UrlType.Img && this.downloadResourceFromExternalSite)
                )
            {
                string tempbase = context.BaseLocation;
                int i;

                if ((i = tempbase.LastIndexOf("/")) >= 0)
                {
                    tempbase = tempbase.Substring(0, i + 1);
                }

                if (!location.OriginalString.StartsWith(tempbase))
                {
                    return false;
                }
            }

            return true;
        }
Example #6
0
        public static void Main(string[] args)
        {
            Core.Spider.PrintInfo();

            //Options param = ParseCommand(args);
            //if (param != null)
            //{
            //	StartSpider(param);
            //}
            string json = File.ReadAllText("mysqlsample.json");

            json = Macros.Replace(json);
            SpiderContext spiderContext = JsonConvert.DeserializeObject <SpiderContext>(json);
            List <string> errorMessages;

            if (SpiderContextValidation.Validate(spiderContext, out errorMessages))
            {
                ScriptSpider spider = new ScriptSpider(spiderContext);
                spider.Run(args);
            }
            else
            {
                foreach (var errorMessage in errorMessages)
                {
                    Console.WriteLine(errorMessage);
                }
            }
            Console.Read();
        }
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='mysql';Data Source=192.168.199.211;User ID=root;Password=1qazZAQ!;Port=3306"
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product));
            context.SetDownloader(new WebDriverDownloader
            {
                Browser = Extension.Downloader.WebDriver.Browser.Chrome
            });
            return(context);
        }
Example #8
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.Site = new Site
            {
                MaxSleepTime = 1,
                MinSleepTime = 1
            };
            context.SetTaskGroup("cnblogs homepage");
            context.SetSpiderName("cnblogs homepage " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddStartUrl("http://news.cnblogs.com/n/page/1/");
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='taobao';Data Source= 86research.imwork.net;User ID=root;Password=1qazZAQ!;Port=4306"
            });
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector
                {
                    Expression = "//*[@id='pager']",
                    Type       = ExtractType.XPath
                }
            });
            context.ThreadNum = 5;
            context.AddEntityType(typeof(Cnblogs));
            return(context);
        }
Example #9
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetThreadNum(2);
            context.SetSpiderName("JD_sku_store_test_" + DateTime.Now.ToString("yyyy_MM_dd_HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=mysqlserver;User ID=root;Password=1qazZAQ!;Port=4306"
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddEntityType(typeof(Product));

            return(context);
        }
Example #10
0
        public bool Accept(Uri location, SpiderContext context, ISpiderResource parentSpider)
        {
            string ext = Path.GetExtension(location.Segments[location.Segments.Length - 1]).ToLower();

            int index = Array.IndexOf <string>(this.extensionsToExclude, ext);

            return(index < 0);
        }
 public bool TodayIsGetProduct(string RefCatalogLink)
 {
     using (DbContext Context = new SpiderContext())
     {
         string rankTime = DateTime.Now.ToString("yyyy-MM-dd");
         return(Context.Set <Catalog>().Count(o => o.CatalogLink == RefCatalogLink && o.RankTime == rankTime) > 0);
     }
 }
Example #12
0
 public void Add(T entity)
 {
     using (DbContext Context = new SpiderContext())
     {
         Context.Set <T>().Add(entity);
         Save(Context);
     }
 }
        public bool Accept(Uri location, SpiderContext context, ISpiderResource parentSpider)
        {
            string ext = Path.GetExtension(location.Segments[location.Segments.Length-1]).ToLower();

            int index = Array.IndexOf<string>(this.extensionsToExclude, ext);

            return index < 0;
        }
Example #14
0
        static async Task DownloadImages()
        {
            Func <DownloadModel, Task <string> > asyncTask = async model =>
            {
                using (WebClient webClient = new WebClient())
                {
                    Console.WriteLine($"下载文件{model.Url}");
                    string fileName = Guid.NewGuid().ToString().Replace("-", "") + ".jpg";
                    string path     = Path.Combine(model.alias, fileName);
                    try
                    {
                        string savePath = Path.Combine(BasePath, model.alias);
                        if (!Directory.Exists(savePath))
                        {
                            Directory.CreateDirectory(savePath);
                        }
                        await webClient.DownloadFileTaskAsync(model.Url, Path.Combine(savePath, fileName));

                        Console.WriteLine($"下载完成,输出路径{path}");
                        return(path);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine($"下载出错:{e.Message}");
                        return("");
                    }
                }
            };
            Func <DownloadModel, Task> saveRecordAsync = async model =>
            {
                Console.WriteLine("准备保存地址到数据库");
                using (SpiderContext db = new SpiderContext())
                {
                    var article = await db.WeChatArticles.FirstOrDefaultAsync(c => c.Id == model.Id);

                    if (null != article)
                    {
                        article.Download  = true;
                        article.LocalPath = model.Path;
                        await db.SaveChangesAsync();
                    }
                }
                Console.WriteLine("保存完成");
            };

            foreach (var item in downloadModels)
            {
                item.Path = await asyncTask(item);

                if (!string.IsNullOrWhiteSpace(item.Path))
                {
                    await saveRecordAsync(item);
                }

                Console.WriteLine($"在此休眠{SleepTime / 1000}秒");
                Thread.Sleep(SleepTime);
            }
        }
 public LinkContextSpider(SpiderContext spiderContext)
     : base(spiderContext)
 {
     var nextSpider = spiderContext as LinkSpiderContext;
     if (nextSpider != null)
     {
         NextSpiders = nextSpider.NextSpiderContexts;
     }
 }
Example #16
0
        public void Start()
        {
            var scheduler = new SchedulerTest();
            var context   = new SpiderContext("https://www.baidu.com/", "");
            var spider    = new SpiderOperater(scheduler, "обть╡Бйт", context);

            spider.Start();
            Assert.AreEqual(true, context.Result.Succeeded);
        }
Example #17
0
 public async Task <List <WeChatAccount> > ListWeChatAccountAsync()
 {
     using var db = new SpiderContext();
     return(await db.WeChatAccounts
            .Include(c => c.TaskStartSign)
            .Include(role => role.SpiderRoles)
            .OrderBy(c => c.Id)
            .ToListAsync());
 }
Example #18
0
 protected override SpiderContext GetSpiderContext()
 {
     SpiderContext context = new SpiderContext();
     context.SetTaskGroup("cnblogs homepage");
     context.SetSpiderName("cnblogs homepage " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
     context.AddStartUrl("http://www.cnblogs.com");
     context.AddPipeline(new ConslePipeline());
     context.AddEntityType(typeof(HomePage));
     return context;
 }
Example #19
0
        protected override Task SuccessPipelineAsync(SpiderContext context)
        {
            locations.Add(Tuple.Create <Tuple <double, double>, string>(Tuple.Create <double, double>(context.Data.latitude, context.Data.longitude), context.Data.name));
            //Console.WriteLine(context.Data.latitude);
            //Console.WriteLine(context.Data.longitude);
            //Console.WriteLine(context.Data.name);
            //Console.WriteLine("-------------------------------");

            return(base.SuccessPipelineAsync(context));
        }
Example #20
0
 public bool Accept(Uri location, SpiderContext context, ISpiderResource parentSpider)
 {
     if (this.maxDepth > 0 && parentSpider.Depth >= this.maxDepth)
     {
         return false;
     }
     else
     {
         return true;
     }
 }
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetTaskGroup("cnblogs homepage");
            context.SetSpiderName("cnblogs homepage " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddStartUrl("http://www.cnblogs.com");
            context.AddPipeline(new ConslePipeline());
            context.AddEntityType(typeof(HomePage));
            return(context);
        }
Example #22
0
 public bool Accept(Uri location, SpiderContext context, ISpiderResource parentSpider)
 {
     if (this.maxDepth > 0 && parentSpider.Depth >= this.maxDepth)
     {
         return(false);
     }
     else
     {
         return(true);
     }
 }
 public void DeleteProduct(string RefCatalogLink, string RankTime)
 {
     using (DbContext Context = new SpiderContext())
     {
         SQLiteParameter[] sqlParameter = new SQLiteParameter[] {
             new SQLiteParameter("@RefCatalogLink", RefCatalogLink),
             new SQLiteParameter("@RankTime", RankTime)
         };
         Context.Database.ExecuteSqlCommand($"DELETE FROM Product WHERE RefCatalogLink=@RefCatalogLink AND RankTime=@RankTime; UPDATE Catalog SET RankTime=@RankTime WHERE CatalogLink=@RefCatalogLink;", sqlParameter);
     }
 }
 public void AddRange(List <Product> list)
 {
     using (DbContext Context = new SpiderContext())
     {
         list.ForEach(l =>
         {
             l.ID = Guid.NewGuid().ToString().ToUpper();
         });
         Context.Set <Product>().AddRange(list);
         Save(Context);
     }
 }
Example #25
0
        public async Task <int> SetAccountSpiderStop(long accountId)
        {
            using var spiderContext = new SpiderContext();
            var account = await spiderContext.WeChatAccounts.FirstOrDefaultAsync(c => c.Id == accountId);

            if (null == account)
            {
                throw new NullReferenceException("未找到公众号实体");
            }
            account.TaskStartSign = null;
            return(await spiderContext.SaveChangesAsync());
        }
Example #26
0
        public async Task <int> SetClientSignOut(long clientId)
        {
            using var spiderContext = new SpiderContext();
            var sign = await spiderContext.TaskStartSigns.FirstOrDefaultAsync(c => c.ClientId == clientId);

            if (null == sign)
            {
                throw new NullReferenceException("未找到实体");
            }
            sign.RunStatus = false;
            return(await spiderContext.SaveChangesAsync());
        }
Example #27
0
 public async Task InsertClientSgin(long clientId)
 {
     using var db = new SpiderContext();
     db.TaskStartSigns.Add(new TaskStartSign
     {
         Id        = IdWorkContext.ID_WORKER.NextId(),
         ClientId  = clientId,
         StartDate = DateTime.Now,
         RunStatus = true
     });
     await db.SaveChangesAsync();
 }
 public void AddRange(List <Product> list)
 {
     using (DbContext Context = new SpiderContext())
     {
         DbSet <Product> dbSetEntity = Context.Set <Product>();
         Product         first       = list[0];
         list.ForEach(l =>
         {
             l.ID = Guid.NewGuid();
         });
         dbSetEntity.AddRange(list);
         Save(Context);
     }
 }
Example #29
0
        public bool Accept(Uri uri, SpiderContext context, ISpiderResource parentSpider)
        {
            string fileName = uri.Segments[uri.Segments.Length - 1];

            for (int i = 0; i < namesToExclude.Length; i++)
            {
                if (fileName.IndexOf(namesToExclude[i]) >= 0)
                {
                    return false;
                }
            }

            return true;
        }
        public bool Accept(Uri uri, SpiderContext context, ISpiderResource parentSpider)
        {
            string fileName = uri.Segments[uri.Segments.Length - 1];

            for (int i = 0; i < namesToExclude.Length; i++)
            {
                if (fileName.IndexOf(namesToExclude[i]) >= 0)
                {
                    return(false);
                }
            }

            return(true);
        }
Example #31
0
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();

            context.SetTaskGroup("JD sku/store test");
            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector {
                    Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]"
                },
                Patterns = new List <string> {
                    @"&page=[0-9]+&"
                }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306"
            });
            context.SetScheduler(new RedisScheduler
            {
                Host     = "ooodata.com",
                Password = "******",
                Port     = 6379
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=1", new Dictionary <string, object> {
                { "name", "手机" }, { "cat3", "655" }
            });
            context.AddPrepareStartUrls(new ConfigurableDbPrepareStartUrls()
            {
                Source        = DataSource.MySql,
                ConnectString = "Database='test';Data Source= ooodata.com;User ID=root;Password=1qazZAQ!123456;Port=4306",
                TableName     = "jd.category",
                Columns       = new List <BaseDbPrepareStartUrls.Column> {
                    new BaseDbPrepareStartUrls.Column {
                        Name = "url", Formatters = new List <Formatter> {
                            new ReplaceFormatter {
                                OldValue = ".html", NewValue = ""
                            }
                        }
                    }
                },
                FormateStrings = new List <string> {
                    "{0}&page=1&JL=6_0_0"
                }
            });
            context.AddEntityType(typeof(Product));
            return(context);
        }
 public Catalog AddIfNoExist(Catalog entity)
 {
     using (DbContext db = new SpiderContext())
     {
         var findEntity = db.Set <Catalog>().Where(o => o.RID == entity.RID).FirstOrDefault();
         if (findEntity != null)
         {
             return(findEntity);
         }
         entity.ID = Guid.NewGuid().ToString().ToUpper();
         db.Set <Catalog>().Add(entity);
         Save(db);
         return(entity);
     }
 }
Example #33
0
        public async Task UpdateClientSgin(long clientId)
        {
            using var spiderContext = new SpiderContext();
            var sign = await spiderContext.TaskStartSigns.FirstOrDefaultAsync(c => c.ClientId == clientId);

            if (null != sign)
            {
                sign.StartDate = DateTime.Now;
                await spiderContext.SaveChangesAsync();
            }
            else
            {
                throw new NullReferenceException("未找到实体");
            }
        }
Example #34
0
        public async Task <int> InsertArticleByList(List <WeChatArticle> list, long accountId)
        {
            using var db = new SpiderContext();
            var account = await db.WeChatAccounts.FirstOrDefaultAsync(c => c.Id == accountId);

            if (null == account)
            {
                throw new NullReferenceException("未找到公众号实体");
            }
            list.ForEach(item =>
            {
                item.WeChatAccount = account;
                db.WeChatArticles.Add(item);
            });
            return(await db.SaveChangesAsync());
        }
Example #35
0
 public Catalog AddIfNoExist(Catalog entity)
 {
     using (DbContext db = new SpiderContext())
     {
         var findEntity = db.Set <Catalog>().Where(o => o.CatalogLink == entity.CatalogLink).FirstOrDefault();
         if (findEntity != null)
         {
             return(findEntity);
         }
         entity.ID         = Guid.NewGuid();
         entity.CreateTime = DateTime.Now;
         db.Set <Catalog>().Add(entity);
         Save(db);
         return(entity);
     }
 }
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();
            context.SetSpiderName("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.AddTargetUrlExtractor(new Extension.Configuration.TargetUrlExtractor
            {
                Region = new Extension.Configuration.Selector { Type = ExtractType.XPath, Expression = "//span[@class=\"p-num\"]" },
                Patterns = new List<string> { @"&page=[0-9]+&" }
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=;User ID=root;Password=;Port=4306"
            });
            context.AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary<string, object> { { "name", "手机" }, { "cat3", "655" } });
            context.AddEntityType(typeof(Product));

            return context;
        }
        public ModelSpider(SpiderContext spiderContext)
        {
            #if NET_CORE
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
            #endif
            SpiderContext = spiderContext;

            if (!SpiderContext.IsBuilt)
            {
                SpiderContext.Build();
            }

            Name = $"{SpiderContext.UserId}-{SpiderContext.SpiderName}";

            Logger = LogUtils.GetLogger(SpiderContext.SpiderName, SpiderContext.UserId, SpiderContext.TaskGroup);

            InitEnvoriment();
        }
Example #38
0
        public WebSpiderResource(SpiderContext context, ISpiderResource parent, string location)
        {
            this.context = context;
            this.location = location;
            this.parent = parent;

            UpdateDepth();

            string localFile = GetLocalFile();

            download = DownloadManager.Instance.Add(
                    ResourceLocation.FromURL(this.Location),
                    null,
                    localFile,
                    1,
                    false);

            download.StateChanged += new EventHandler(download_StateChanged);
        }
Example #39
0
        public WebSpiderResource(SpiderContext context, ISpiderResource parent, string location)
        {
            this.context  = context;
            this.location = location;
            this.parent   = parent;

            UpdateDepth();

            string localFile = GetLocalFile();

            download = DownloadManager.Instance.Add(
                ResourceLocation.FromURL(this.Location),
                null,
                localFile,
                1,
                false);

            download.StateChanged += new EventHandler(download_StateChanged);
        }
        protected override SpiderContext GetSpiderContext()
        {
            SpiderContext context = new SpiderContext();
            context.SetSpiderName("ddeng.com " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
            context.SetSite(new Site
            {
                Cookie = "sid=dea284fc36c24e8cbcd447343d7b8a4e; sn=DD962248; ctid=000000; ctnm=%E5%8F%A4%E9%95%87%E7%81%AF%E9%A5%B0%E6%89%B9%E5%8F%91; ctpv=%E5%B9%BF%E4%B8%9C; JSESSIONID=acbBqFfOD4I63d9PziDvv; DDENG=c4fc08ae2e3ba3efeddbc667c2f45e615a85e80009169501dc244a03e87908aa61146548b97ed9c7dc07af23bfd80bff5008f8c8867a9165d4bd2732aca0db7dedae2e042d3968fcad1150f36be242e8a32a3f59db2a0b39216a59f1628508c5799644532a9d99925f9841b3c13a1f97; userId=10003379; previousUser=%E5%A4%95%E7%8E%89; Hm_lvt_9e33f153f28be198970d205d90a24f28=1466146335; Hm_lpvt_9e33f153f28be198970d205d90a24f28=1466146392; Hm_lvt_54b4cb498afd05463ab4611b38a6f289=1466146335; Hm_lpvt_54b4cb498afd05463ab4611b38a6f289=1466146392; CNZZDATA1256982382=395301521-1466143554-%7C1466143554",
                Headers = new Dictionary<string, string>
                 {
                     { "Cache-Control","max-age=0"},
                     { "Upgrade-Insecure-Requests","1"}
                 },
                UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",
                Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
            });
            context.AddPipeline(new MysqlPipeline
            {
                ConnectString = "Database='test';Data Source=;User ID=root;Password=;Port=4306"
            });
            context.AddStartUrl("http://www.ddeng.com/product/967659");
            context.AddEntityType(typeof(Corp));

            return context;
        }
Example #41
0
        private void btnDownloadSite_Click(object sender, EventArgs e)
        {
            ResourceLocation rl = this.location1.ResourceLocation;

            rl.BindProtocolProviderType();

            if (rl.ProtocolProviderType == null)
            {
                MessageBox.Show("Invalid URL format, please check the location field.",
                    AppManager.Instance.Application.MainForm.Text, MessageBoxButtons.OK, MessageBoxIcon.Error);
                return;
            }

            AutoDownloadsExtension scheduler;

            scheduler = (AutoDownloadsExtension)AppManager.Instance.Application.GetExtensionByType(typeof(AutoDownloadsExtension));

            if (! scheduler.Active)
            {
                using (StartAutoDownloadsForm startAd = new StartAutoDownloadsForm())
                {
                    if (startAd.ShowDialog() == DialogResult.Yes)
                    {
                        startAd.ScheduledDownloadEnabler.ApplySettings();
                    }
                }

                if (!scheduler.Active)
                {
                    return;
                }
            }

            SpiderContext spiderCntx = new SpiderContext(
                folderBrowser1.Folder,
                rl.URL);

            spiderCntx.NamingRules.Add(
                new BaseLocationNamingRule(chkImagesFromExternal.Checked, chkResourcesFromExternal.Checked));

            if (chkExcludeExtensions.Checked)
            {
                spiderCntx.NamingRules.Add(new ExcludeExtensionNamingRule(txtExcludeExt.Text.Split(',')));
            }
            if (chkExcludeNames.Checked)
            {
                spiderCntx.NamingRules.Add(new ExcludeNameNamingRule(txtExcludeNames.Text.Split(',')));
            }
            if (numDownloadDepth.Value > 0)
            {
                spiderCntx.NamingRules.Add(new MaxDepthNamingRule((int)numDownloadDepth.Value));
            }
            if (chkRemoveCompleted.Checked)
            {
                spiderCntx.ResourceCompleted += delegate(SpiderContext cntx, ISpiderResource resource)
                {
                    DownloadManager.Instance.ClearEnded();
                };
            }

            siteSpider = new Spider(spiderCntx, new WebSpiderResourceFactory());
            siteSpider.Completed += new CompletedEventHandler(siteSpider_Completed);
            siteSpider.Aborted += new EventHandler(siteSpider_Aborted);
            siteSpider.BeginLookup();

            EnterDownloadMode(true);
        }
Example #42
0
 void siteSpider_Completed(SpiderContext cntx)
 {
     this.BeginInvoke((MethodInvoker)delegate() { EnterDownloadMode(false); });
 }
 public ISpiderResource CreateSpider(SpiderContext cntx, ISpiderResource parent, string location)
 {
     return new WebSpiderResource(cntx, parent, location);
 }
Example #44
0
 public EntityProcessor(SpiderContext spiderContext)
 {
     Site = spiderContext.Site;
     _spiderContext = spiderContext;
 }