예제 #1
0
        private async Task SetDetailPageTask(SpiderLog spiderLog, IWebDriver webDriver)
        {
            var _webDriver  = (PhantomJSDriver)webDriver;
            var htmlElement = webDriver.FindElement(By.TagName("html"));
            var imgList     = htmlElement.FindMultiValueByCss("#pageSelect option@value");

            for (int imgIndex = 0; imgIndex < imgList.Count; imgIndex++)
            {
                var item         = imgList[imgIndex];
                var imgSrc       = item;
                var cateName     = "";
                var contentTitle = "";
                uploderCreator.SetContentIImg(contentTitle, cateName, imgSrc, imgIndex);
            }
            //var imgCount = htmlElement.FindMultiValueByCss("#pageSelect option@text").Count;
            //var imgList = new List<string>();
            //for (int imgIndex = 0; imgIndex < imgCount; imgIndex++)
            //{
            //    _webDriver.ExecuteScript("SMH.utils.goPage(" + (imgIndex + 1) + ")");
            //    var imgElement = _webDriver.FindElement(By.CssSelector("#mangaFile"));
            //    var imgAlt = imgElement.GetAttribute("alt");
            //    var imgSrc = imgElement.GetAttribute("src");
            //    var cateName = imgAlt.Split(' ')[0];
            //    var contentTitle = imgAlt.Split(' ')[1];
            //    uploderCreator.SetContentIImg(contentTitle, cateName, imgSrc, imgIndex);
            //    //imgList.Add()
            //}
        }
예제 #2
0
        static async Task SetCategoryMainTask(SpiderLog spiderLog, IWebDriver webDriver)
        {
            var manhuaCategoryRegexTxt = @"Manhua/\d+.html";
            var manhuaCategoryRegex    = new Regex(manhuaCategoryRegexTxt);

            var manhuaDetailRegextTxt = @"Manhua/\d+_\d+.html";
            var manhuaDetailRegex     = new Regex(manhuaDetailRegextTxt);

            var newsCategoryTxt = @"News/\d+\.html";

            if (webDriver == null)
            {
                spiderLog.ReloadSuccess = false;
                await TaskExutor.Instance.UpdateTask(spiderLog);
            }

            var html = webDriver.FindElement(By.TagName("body")).GetAttribute("innerHTML");

            Console.WriteLine(html);
            var aList = webDriver.FindElements(By.TagName("a"));

            foreach (var aElement in aList)
            {
                var url = aElement.GetAttribute("href");
                Console.WriteLine(url);
                if (string.IsNullOrEmpty(url) || !url.ToLower().Contains("/news") && !url.ToLower().Contains("/manhua"))
                {
                    continue;
                }

                //处理漫画主页
                var isManhuaCategoryPage = manhuaCategoryRegex.IsMatch(url);
                if (isManhuaCategoryPage)
                {
                    await TaskExutor.Instance.SetTask(url, "category");

                    if (!TaskExutor.Instance.SpiderTaskDelegate.ContainsKey("category"))
                    {
                        TaskExutor.Instance.SpiderTaskDelegate.Add("category", SetCategoryTask);
                    }
                }
                //处理漫画详情页
                var isManhuaDetailPage = manhuaDetailRegex.IsMatch(url);
                if (isManhuaDetailPage)
                {
                    await TaskExutor.Instance.SetTaskWithParentUrl(url, spiderLog.TargetUrl, spiderLog.ParentName, spiderLog.Order, "detail");

                    if (!TaskExutor.Instance.SpiderTaskDelegate.ContainsKey("detail"))
                    {
                        TaskExutor.Instance.SpiderTaskDelegate.Add("detail", SetDetailTask);
                    }
                }

                if (!isManhuaCategoryPage &&
                    !isManhuaDetailPage)
                {
                    await TaskExutor.Instance.SetTask(url, "main");
                }
            }
        }
예제 #3
0
파일: SpiderJob.cs 프로젝트: jilumvc/Sajoo
        public void Execute()
        {
            List<SpiderTemplate> list = SpiderTemplate.find( "IsDelete=0" ).list();
            DbContext.closeConnectionAll();

            logger.Info( "begin SpiderJob=" + list.Count );

            StringBuilder log = new StringBuilder();
            foreach (SpiderTemplate s in list) {

                ISpiderTool spider = getSpider( s );

                spider.DownloadPage( s, log, new int[] { SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo } ); // 2~6秒暂停
                DbContext.closeConnectionAll();

                int sleepms = rd.Next( SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo );
                Thread.Sleep( sleepms );
            }

            String[] arrLog = log.ToString().Split( '\n' );
            StringBuilder errorLog = new StringBuilder();
            foreach (String item in arrLog) {
                if (item.Trim().StartsWith( "error=" )) errorLog.AppendLine( item.Trim() );
            }

            SpiderLog sg = new SpiderLog();
            sg.Msg = errorLog.ToString();
            sg.insert();
            DbContext.closeConnectionAll();
        }
예제 #4
0
        private async Task SetCategoryPageTask(SpiderLog spiderLog, IWebDriver webDriver)
        {
            var htmlElement = webDriver.FindElement(By.TagName("html"));
            var title       = htmlElement.FindValueByCss(".anim_title_text a@text");
            var imgSrc      = htmlElement.FindValueByCss(".anim_intro_ptext img@src");
            var tagsTmpArr  = htmlElement.FindMultiValueByCss(".anim-main_list td@text");
            var tags        = FilterTagsTmpArr(tagsTmpArr);

            tags.Add("动漫之家");
            uploderCreator.CreateCategory(title, imgSrc, tags.ToArray());

            var excutor             = TaskExutor.Instance;
            var detailPageAElements = htmlElement.FindElements(By.CssSelector(".cartoon_online_border a"));
            var detailOrder         = 0;
            var index = detailPageAElements.Count;

            foreach (var aElement in detailPageAElements)
            {
                var href          = aElement.GetAttribute("href");
                var contentTitle  = aElement.GetAttribute("innerText");
                var _contentTitle = Regex.Replace(contentTitle, @"\d+p", string.Empty);

                uploderCreator.CreateContent(_contentTitle, title, detailOrder);
                await excutor.SetTaskWithParentUrl(href, spiderLog.TargetUrl, title, index, "dongmanzhijiaDetailPage");

                detailOrder++;
                index--;
            }
        }
예제 #5
0
        private async Task SetCategoryPageTask(SpiderLog spiderLog, IWebDriver webDriver)
        {
            var htmlElement = webDriver.FindElement(By.TagName("html"));
            var title       = htmlElement.FindValueByCss(".book-title@text");
            var imgSrc      = htmlElement.FindValueByCss(".hcover img@src");
            var tagsTmpArr  = htmlElement.FindMultiValueByCss(".detail-list li span@text");
            var tags        = FilterTagsTmpArr(tagsTmpArr);

            tags.Add("漫画柜");
            uploderCreator.CreateCategory(title, imgSrc, tags.ToArray());

            var excutor             = TaskExutor.Instance;
            var detailPageAElements = htmlElement.FindElements(By.CssSelector(".chapter-list a"));
            var detailOrder         = 0;
            var index = detailPageAElements.Count;

            foreach (var aElement in detailPageAElements)
            {
                var href          = aElement.GetAttribute("href");
                var contentTitle  = aElement.GetAttribute("innerText");
                var _contentTitle = Regex.Replace(contentTitle, @"\d+p", string.Empty);

                uploderCreator.CreateContent(_contentTitle, title, detailOrder);
                await excutor.SetTaskWithParentUrl(href, spiderLog.TargetUrl, title, index, "manhuaguiDetailPage");

                detailOrder++;
                index--;
            }
        }
예제 #6
0
        private async Task YaoqimanhuaCategory(SpiderLog spiderLog, IWebDriver webDriver)
        {
            var contentCreator = new ContentSpiderCreator();

            var excutor       = TaskExutor.Instance;
            var totalCountStr = webDriver.FindElement(By.CssSelector(".showpage a")).GetAttribute("innerText");

            totalCountStr = totalCountStr.Replace("共", "").Replace("页", "").Replace(":", "");

            var name     = webDriver.FindElement(By.CssSelector(".ptitle.fc1")).GetAttribute("innerText");
            var coverImg = webDriver.FindElement(By.CssSelector("#imgString img")).GetAttribute("src");
            //contentCreator.CreateCategory(name, coverImg, "日漫", "邪恶漫画", "妖气");

            var totalCount  = int.Parse(totalCountStr);
            var targetIdStr = spiderLog.TargetUrl.Split('/').Last().Replace(".html", "");

            var imgResourceTemplate = string.Join('/', coverImg.Split("/").Reverse().Skip(1).Reverse().Append("yaoqi{0}.jpg"));

            contentCreator.CreateNewCategoryAndContentAndLoadImg("全部", name, 0, imgResourceTemplate, totalCount);
            //for (int pageIndex = 1; pageIndex <= totalCount; pageIndex++)
            //{
            //    string aLink;

            //    if (pageIndex == 1)
            //        aLink = $"http://m.yaoqi520.net/shaonvmanhua/{targetIdStr}.html";
            //    else
            //        aLink = $"http://m.yaoqi520.net/shaonvmanhua/{targetIdStr}_{pageIndex}.html";
            //    await excutor.SetTaskWithParentUrl(aLink, spiderLog.ParentUrl, name, pageIndex, "YaoqimanhuaDetail");

            //}
        }
예제 #7
0
        private async Task SetListPageTask(SpiderLog spiderLog, IWebDriver webDriver)
        {
            var aLinkContainer = webDriver.FindElement(By.CssSelector("#contList"));
            var excutor        = TaskExutor.Instance;
            var aLinkList      = aLinkContainer.FindMultiValueByCss("a.bcover@href");

            foreach (var alink in aLinkList)
            {
                await excutor.SetTask(alink, "manhuiguiCategoryPage");
            }
        }
예제 #8
0
        /// <summary>
        /// 处理资讯详情
        /// </summary>
        /// <param name="spiderLog"></param>
        /// <param name="webElement"></param>
        /// <returns></returns>
        static async Task SetNewsDetailTask(SpiderLog spiderLog, IWebDriver driver)
        {
            IWebElement webElement = driver.FindElement(By.TagName("html"));
            var         title      = webElement.FindElement(By.CssSelector(".content .article-title")).GetAttribute("innerText");
            var         time       = webElement.FindElement(By.CssSelector(".content .article-info .time")).GetAttribute("innerText");
            var         content    = webElement.FindElement(By.CssSelector(".content .article-content")).GetAttribute("innerHTML");
            var         img        = webElement.FindElement(By.CssSelector(".content .article-content")).FindElement(By.TagName("img"))?.GetAttribute("src");
            var         dbCtx      = GetDbContext();
            var         dateTime   = DateTime.Parse(time);

            Console.WriteLine("process : :" + title);
            var cateNews = GetTopCategory(dbCtx, "资讯");

            var hasContent = dbCtx.ContentEntry.Any(x => x.Title == title);

            if (!hasContent)
            {
                var contentEntry = new ContentEntry
                {
                    Title      = title,
                    CreateTime = dateTime,
                    Id         = Guid.NewGuid(),
                    Content    = content,
                    Category   = cateNews,
                    Tags       = new List <Tags>
                    {
                        new Tags
                        {
                            Id   = Guid.NewGuid(),
                            Name = "资讯"
                        },
                        new Tags
                        {
                            Id   = Guid.NewGuid(),
                            Name = "梦域动漫"
                        }
                    }
                };
                if (img != null)
                {
                    contentEntry.MediaResource = new List <FileEntry> {
                        new FileEntry {
                            ActualPath = img,
                            CreateTime = dateTime,
                            Id         = Guid.NewGuid(),
                            Name       = "tmppath"
                        }
                    };
                }
                await dbCtx.ContentEntry.AddAsync(contentEntry);

                await dbCtx.SaveChangesAsync();
            }
        }
예제 #9
0
        private async Task YaoqimanhuaMain(SpiderLog spider, IWebDriver webDriver)
        {
            var excutor        = TaskExutor.Instance;
            var targetElements = webDriver.FindElements(By.CssSelector(".pic a"));

            foreach (var item in targetElements)
            {
                var aLink = item.GetAttribute("href");
                await excutor.SetTask(aLink, "YaoqimanhuaCategory");
            }
        }
예제 #10
0
        private async Task SetListPageTask(SpiderLog spiderLog, IWebDriver webDriver)
        {
            var aLinkContainer = webDriver.FindElement(By.CssSelector(".column"));
            var excutor        = TaskExutor.Instance;
            var aLinkList      = aLinkContainer.FindMultiValueByCss(".pic a@href");

            foreach (var alink in aLinkList)
            {
                await excutor.SetTask(alink, "dongmanzhijiaCategoryPage");
            }
        }
예제 #11
0
        /// <summary>
        /// 处理资讯列表页
        /// </summary>
        /// <param name="spiderLog"></param>
        /// <param name="webElement"></param>
        /// <returns></returns>
        static async Task SetNewsListTask(SpiderLog spiderLog, IWebDriver driver)
        {
            IWebElement webElement  = driver.FindElement(By.TagName("html"));
            var         targetAList = webElement.FindElements(By.CssSelector(".content h2 a"));
            var         exutor      = TaskExutor.Instance;

            foreach (var item in targetAList)
            {
                var url = item.GetAttribute("href");
                await exutor.SetTask(url, "zixunDetail");
            }
        }
예제 #12
0
        private async Task Run(SpiderLog spiderLog = null)
        {
            //SpiderLog spiderLog;
            if (spiderLog == null)
            {
                if (currentId < 1)
                {
                    spiderLog = await NextTask();
                }
                else
                {
                    spiderLog = await NextTask(currentId);
                }
            }


            while (spiderLog != null)
            {
                var runner    = new PhantomJsRunner(null, "", "");
                var webDriver = runner.WebDriver;
                var hasErr    = false;
                try
                {
                    var needProcess = SpiderTaskDelegate.ContainsKey(spiderLog.Type);
                    if (needProcess)
                    {
                        webDriver.Navigate().GoToUrl(spiderLog.TargetUrl);
                        await SpiderTaskDelegate[spiderLog.Type](spiderLog, webDriver);
                    }
                }
                catch (Exception exc)
                {
                    Console.WriteLine(exc.Message);
                    hasErr = true;
                }
                currentId = spiderLog.Id;
                webDriver.Quit();
                Thread.Sleep(speedRun);
                spiderLog.ReloadSuccess = !hasErr;
                await UpdateTask(spiderLog);

                spiderLog = await NextTask(currentId);
            }

            TaskIsRunning = false;
        }
예제 #13
0
        /// <summary>
        /// 当前页为detail
        /// </summary>
        /// <param name="spiderLog"></param>
        /// <param name="webElement"></param>
        /// <returns></returns>
        static async Task SetDetailTask(SpiderLog spiderLog, IWebDriver driver)
        {
            var         mainDbContext = GetDbContext();
            IWebElement webElement    = driver.FindElement(By.TagName("html"));
            var         imgList       = webElement.FindElements(By.CssSelector(".r_img img"));
            var         detailTitle   = webElement.FindElement(By.CssSelector(".fl.r_tab_l span")).GetAttribute("innerText");

            var category = await mainDbContext.Categories.FirstOrDefaultAsync(x => x.Name == spiderLog.ParentName);

            var exitedContent = await mainDbContext.ContentEntry.AnyAsync(x => x.Title == detailTitle && x.Category.Id == category.Id);

            if (!exitedContent)
            {
                var medieaResources = new List <FileEntry>();
                for (int imgIndex = 0; imgIndex < imgList.Count; imgIndex++)
                {
                    var imgItem = imgList[imgIndex];
                    var imgSrc  = imgItem.GetAttribute("src");
                    medieaResources.Add(new FileEntry
                    {
                        ActualPath = imgSrc,
                        CreateTime = DateTime.Now,
                        Id         = Guid.NewGuid(),
                        Name       = "tmpname",
                        Order      = imgIndex
                    });
                }

                var content = new ContentEntry
                {
                    Id            = Guid.NewGuid(),
                    CreateTime    = DateTime.Now,
                    Category      = category,
                    MediaResource = medieaResources,
                    Title         = detailTitle,
                    Order         = spiderLog.Order
                };

                await mainDbContext.AddAsync(content);

                await mainDbContext.SaveChangesAsync();
            }


            Console.WriteLine("Process : " + spiderLog.ParentName + " - " + detailTitle);
        }
예제 #14
0
        public virtual void Admin()
        {
            String ids    = ctx.PostIdList("choice");
            String action = ctx.Post("action");

            if (strUtil.IsNullOrEmpty(ids))
            {
                echoError("请先选择");
                return;
            }

            if ("delete".Equals(action))
            {
                SpiderLog.deleteBatch("Id in (" + ids + ")");
            }

            echoAjaxOk();
        }
예제 #15
0
        private async Task SetXinfanTask(SpiderLog spiderLog, IWebDriver webDriver)
        {
            webDriver.ScrollToEnd(10, 2);
            var targetElements = webDriver.FindElements(By.CssSelector(".news-wrapper .news-box"));
            var excutor        = TaskExutor.Instance;

            foreach (var item in targetElements)
            {
                var aLink      = item.FindValueByCss("h4 a@href");
                var IsNewsItem = sohuNewsItemRegex.IsMatch(aLink);
                //若广告链接跳过
                if (!IsNewsItem)
                {
                    continue;
                }
                await excutor.SetTask(aLink, "SohuNewsDetail");
            }
        }
예제 #16
0
        public virtual void List()
        {
            set("clearLogUrl", to(Clear));
            set("OperationUrl", to(Admin));

            DataPage <SpiderLog> logs = SpiderLog.findPage("");
            IBlock block = getBlock("list");

            foreach (SpiderLog log in logs.Results)
            {
                block.Set("log.Id", log.Id);
                block.Set("log.Msg", log.Msg);
                block.Set("log.MsgInfo", strUtil.CutString(log.Msg, 300));
                block.Set("log.Created", log.Created);
                block.Set("log.ViewUrl", to(Show, log.Id));

                block.Next();
            }
            set("page", logs.PageBar);
        }
예제 #17
0
        public void Execute()
        {
            List <SpiderTemplate> list = SpiderTemplate.find("IsDelete=0").list();

            DbContext.closeConnectionAll();

            logger.Info("begin SpiderJob=" + list.Count);

            StringBuilder log = new StringBuilder();

            foreach (SpiderTemplate s in list)
            {
                ISpiderTool spider = getSpider(s);

                spider.DownloadPage(s, log, new int[] { SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo });   // 2~6秒暂停
                DbContext.closeConnectionAll();

                int sleepms = rd.Next(SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo);
                Thread.Sleep(sleepms);
            }

            String[]      arrLog   = log.ToString().Split('\n');
            StringBuilder errorLog = new StringBuilder();

            foreach (String item in arrLog)
            {
                if (item.Trim().StartsWith("error="))
                {
                    errorLog.AppendLine(item.Trim());
                }
            }

            SpiderLog sg = new SpiderLog();

            sg.Msg = errorLog.ToString();
            sg.insert();
            DbContext.closeConnectionAll();
        }
예제 #18
0
        private async Task SetSohuNewsDetail(SpiderLog spiderLog, IWebDriver webDriver)
        {
            var dbCtx = GetDbContext();

            var authElement    = webDriver.FindElement(By.CssSelector(".column.left"));
            var articleElement = webDriver.FindElement(By.CssSelector(".left.main"));


            var auth_img  = authElement.FindValueByCss(".user-pic img@src");
            var auth_name = authElement.FindValueByCss(".user-info h4@text");

            var article_title      = articleElement.FindValueByCss(".text-title h1@text");
            var article_createTime = articleElement.FindValueByCss(".text-title .article-info .time@text");
            var article_tags       = articleElement.FindMultiValueByCss(".text-title .article-info .tag@text");
            var article_content    = articleElement.FindValueByCss(".article@html");
            var article_imgs       = articleElement.FindMultiValueByCss(".article img@src");

            var excitedAccount = await dbCtx.Set <UserAccountEntry>().FirstOrDefaultAsync(x => x.NickName == auth_name);

            if (excitedAccount == null)
            {
                excitedAccount = new UserAccountEntry()
                {
                    Id       = Guid.NewGuid(),
                    Avatar   = auth_img,
                    NickName = auth_name
                };
                await dbCtx.AddAsync(excitedAccount);

                await dbCtx.SaveChangesAsync();
            }

            var excitedArticle = await dbCtx.ContentEntry.FirstOrDefaultAsync(x => x.Title == article_title);

            if (excitedArticle == null)
            {
                var imgList = new List <FileEntry>();
                if (article_imgs != null && article_imgs.Count > 0)
                {
                    foreach (var item in article_imgs)
                    {
                        imgList.Add(new FileEntry
                        {
                            ActualPath = item,
                            CreateTime = DateTime.Now,
                            Name       = "tmppath",
                            Id         = Guid.NewGuid()
                        });
                    }
                }
                var tagList = new List <Tags>();
                if (article_tags != null && article_tags.Count > 0)
                {
                    foreach (var item in article_tags)
                    {
                        tagList.Add(new Tags
                        {
                            Id   = Guid.NewGuid(),
                            Name = item
                        });
                    }
                }
                var articleId = Guid.NewGuid();
                var category  = await dbCtx.Categories.FirstOrDefaultAsync(x => x.Name == "资讯");

                var article = new ContentEntry
                {
                    Category         = category,
                    Id               = articleId,
                    Content          = article_content,
                    CreateTime       = DateTime.Parse(article_createTime),
                    MediaResource    = imgList,
                    Title            = article_title,
                    Tags             = tagList,
                    ContentEntryInfo = new ContentEntryInfo
                    {
                        Author        = auth_name,
                        Source        = spiderLog.TargetUrl,
                        Id            = articleId,
                        Type          = "资讯",
                        UserAccount   = excitedAccount,
                        UserAccountId = excitedAccount.Id
                    }
                };
                await dbCtx.AddAsync(article);

                await dbCtx.SaveChangesAsync();
            }
        }
예제 #19
0
 public virtual void ClearBegin()
 {
     SpiderLog.deleteBatch("1=1");
     echoRedirect(lang("opok"), to(List));
 }
예제 #20
0
        public virtual void Show(long id)
        {
            SpiderLog log = SpiderLog.findById(id);

            bind("log", log);
        }
예제 #21
0
 public async Task UpdateTask(SpiderLog spiderLog)
 {
     spiderDbContext.Update(spiderLog);
     await spiderDbContext.SaveChangesAsync();
 }
예제 #22
0
        public void Show(int id)
        {
            SpiderLog log = SpiderLog.findById(id);

            bind("log", log);
        }
예제 #23
0
        public void Execute()
        {
            // List<SpiderTemplate> list = SpiderTemplate.find( "IsDelete=0" ).list();
            DbContext.closeConnectionAll();



            StringBuilder log = new StringBuilder();


            IList userRanks = User.find("order by Hits desc, id desc").list(1000);

            logger.Info("begin SpiderJob=" + userRanks.Count);

            foreach (User user in userRanks)
            {
                if (string.IsNullOrEmpty(user.Profile.Address))
                {
                    continue;
                }
                SpiderTemplate s = new SpiderTemplate();
                s.ListUrl         = user.Profile.Address;
                s.ListEncoding    = user.QQ;
                s.ListBodyPattern = user.Profile.Tel;
                s.ListPattern     = user.Profile.WebSite;
                s.DetailPattern   = user.MSN;
                s.IsDelete        = user.Id;
                s.SiteName        = user.Url;
                ISpiderTool spider = getSpider(s);

                spider.DownloadPage(s, log, new int[] { SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo }); // 2~6秒暂停
                DbContext.closeConnectionAll();

                int sleepms = rd.Next(SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo);
                Thread.Sleep(sleepms);
            }
            //foreach (SpiderTemplate s in list) {

            //    ISpiderTool spider = getSpider( s );

            //    spider.DownloadPage( s, log, new int[] { SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo } ); // 2~6秒暂停
            //    DbContext.closeConnectionAll();

            //    int sleepms = rd.Next( SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo );
            //    Thread.Sleep( sleepms );
            //}

            String[]      arrLog   = log.ToString().Split('\n');
            StringBuilder errorLog = new StringBuilder();

            foreach (String item in arrLog)
            {
                if (item.Trim().StartsWith("error="))
                {
                    errorLog.AppendLine(item.Trim());
                }
            }

            SpiderLog sg = new SpiderLog();

            sg.Msg = errorLog.ToString();
            sg.insert();
            DbContext.closeConnectionAll();
        }
예제 #24
0
        /// <summary>
        /// 当前页为category 页
        /// </summary>
        /// <param name="spiderLog"></param>
        /// <param name="webElement"></param>
        /// <returns></returns>
        static async Task SetCategoryTask(SpiderLog spiderLog, IWebDriver driver)
        {
            IWebElement webElement          = driver.FindElement(By.TagName("html"));
            var         detailAList         = webElement.FindElements(By.CssSelector(".d_menu a"));
            var         categoryName        = webElement.FindElement(By.CssSelector(".d_bg_t")).GetAttribute("innerText");
            var         categoryCoverUrl    = webElement.FindElement(By.CssSelector(".fl.d_bgi_href img")).GetAttribute("src");
            var         categoryDescription = webElement.FindElement(By.CssSelector(".d_bg_ce")).GetAttribute("innerText");
            var         categoryNote        = "note:" + webElement.FindElement(By.CssSelector(".Disclaimer_tit")).GetAttribute("innerText");

            var mainDbContext = GetDbContext();

            //可能存在标签再名字上显示的情况
            var nameArr = categoryName.Split(' ');

            var cateName = nameArr[0];

            for (int index = detailAList.Count - 1; index >= 0; index--)
            {
                var currentItem = detailAList[index];
                await TaskExutor.Instance.SetTaskWithParentUrl(currentItem.GetAttribute("href"), spiderLog.TargetUrl, cateName, detailAList.Count - index, "detail");
            }

            var tagList = new List <Tags>();

            foreach (var item in nameArr)
            {
                tagList.Add(new Tags
                {
                    Id   = Guid.NewGuid(),
                    Name = item
                });
            }

            tagList.Add(new Tags
            {
                Id   = Guid.NewGuid(),
                Name = "咕嘛"
            });

            tagList.Add(new Tags
            {
                Id   = Guid.NewGuid(),
                Name = "恋爱"
            });
            var exitedCategory = await mainDbContext.Categories.AnyAsync(x => x.Name == cateName);


            if (!exitedCategory)
            {
                var topcategory = await mainDbContext.Categories.FirstOrDefaultAsync(x => x.Name == "漫画");

                var category = new Categories
                {
                    Id             = Guid.NewGuid(),
                    Name           = cateName,
                    Description    = categoryDescription,
                    CreateTime     = DateTime.Now,
                    Tags           = tagList,
                    ParentCategory = topcategory,
                    MediaResource  = new List <FileEntry> {
                        new FileEntry {
                            ActualPath = categoryCoverUrl,
                            CreateTime = DateTime.Now,
                            Name       = "tmpname"
                        }
                    }
                };

                await mainDbContext.Categories.AddAsync(category);

                await mainDbContext.SaveChangesAsync();
            }

            spiderLog.ParentName = categoryName;
            await TaskExutor.Instance.UpdateTask(spiderLog);

            Console.WriteLine("Process: " + categoryName);
        }
예제 #25
0
 private async Task YaoqimanhuaDetail(SpiderLog spiderLog, IWebDriver webDriver)
 {
     var img = webDriver.FindElement(By.CssSelector("#imgString img")).GetAttribute("src");
 }
예제 #26
0
        public void Execute()
        {
            // List<SpiderTemplate> list = SpiderTemplate.find( "IsDelete=0" ).list();
            DbContext.closeConnectionAll();

            StringBuilder log = new StringBuilder();

            IList userRanks = User.find("order by Hits desc, id desc").list(1000);
            logger.Info("begin SpiderJob=" + userRanks.Count);

            foreach (User user in userRanks)
            {
                if (string.IsNullOrEmpty(user.Profile.Address))
                    continue;
                SpiderTemplate s = new SpiderTemplate();
                s.ListUrl = user.Profile.Address;
                s.ListEncoding = user.QQ;
                s.ListBodyPattern = user.Profile.Tel;
                s.ListPattern = user.Profile.WebSite;
                s.DetailPattern = user.MSN;
                s.IsDelete = user.Id;
                s.SiteName = user.Url;
                ISpiderTool spider = getSpider(s);

                spider.DownloadPage(s, log, new int[] { SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo }); // 2~6秒暂停
                DbContext.closeConnectionAll();

                int sleepms = rd.Next(SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo);
                Thread.Sleep(sleepms);
            }
            //foreach (SpiderTemplate s in list) {

            //    ISpiderTool spider = getSpider( s );

            //    spider.DownloadPage( s, log, new int[] { SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo } ); // 2~6秒暂停
            //    DbContext.closeConnectionAll();

            //    int sleepms = rd.Next( SpiderConfig.SuspendFrom, SpiderConfig.SuspendTo );
            //    Thread.Sleep( sleepms );
            //}

            String[] arrLog = log.ToString().Split( '\n' );
            StringBuilder errorLog = new StringBuilder();
            foreach (String item in arrLog) {
                if (item.Trim().StartsWith( "error=" )) errorLog.AppendLine( item.Trim() );
            }

            SpiderLog sg = new SpiderLog();
            sg.Msg = errorLog.ToString();
            sg.insert();
            DbContext.closeConnectionAll();
        }