Exemplo n.º 1
0
        public async Task <string> RequestHTML(ICatchItem catchItem)
        {
            string response = string.Empty;

            if (catchItem is null)
            {
                return(response);
            }

            Uri    address = catchItem.Uri;
            Uri    proxy   = catchItem.Proxy;
            object extend  = catchItem.Extend;
            string cookie  = catchItem.Cookie;

            try
            {
                var        webCrawlerHttpClientHandler = new WebCrawlerHttpClientHandler(cookie, proxy);
                HttpClient httpClient = new HttpClient(webCrawlerHttpClientHandler);

                response = await httpClient.GetStringAsync(address);
            }
            catch (Exception ex)
            {
                logger.DebugFormat("Error RequestHTML address:{0}: {1}", address.AbsoluteUri, ex.Message);
            }

            return(response);
        }
Exemplo n.º 2
0
        protected override void WebResponseHandle(string response, ICatchItem catchItem)
        {
            try
            {
                string requestDomain = string.Empty;
                requestDomain = string.Format("{0}://{1}", catchItem.Uri.Scheme, catchItem.Uri.Host);

                string html = response;
                var    doc  = new HtmlDocument();

                doc.LoadHtml(html);

                IPage page = RuleConfig.PageRule.GetRule(PageType.AllWork);

                var galleryNodes = page.GetNodes(doc.DocumentNode, "Gallery");
                if (galleryNodes != null)
                {
                    logger.InfoFormat("获取作品列表: 个数:{0}", galleryNodes.Count);

                    galleryNodes.ToList().ForEach(item =>
                    {
                        Work work = new Work();

                        string _address = page.GetSingleNodeValue(item, "Gallery.Address");
                        work.Address    = StringUtil.FillWithDomain(_address, requestDomain)?.AbsoluteUri;
                        //work.Originality = page.GetSingleNodeValue(item, "Gallery.Originality");
                        //work.Role = page.GetSingleNodeValue(item, "Gallery.Role");
                        work.Name = page.GetSingleNodeValue(item, "Gallery.Name");

                        Author author = new Author();

                        author.Address = page.GetSingleNodeValue(item, "Gallery.Author.Address");
                        author.Name    = work.Name;

                        authorHelper.Add(author);
                        workHelper.Add(work);

                        logger.InfoFormat("获得作品: [Address:{0}], [Originality:{1}], [Role:{2}], [Name:{3}]", work.Address, work.Originality, work.Role, work.Name);
                    });
                }
            }
            catch (Exception ex)
            {
                logger.ErrorFormat("WorkCatcher WebResponseHandle:{0}", ex);
            }
        }
Exemplo n.º 3
0
        protected override void WebResponseHandle(string response, ICatchItem catchItem)
        {
            try
            {
                string className = (string)catchItem.Extend;

                IProxyHandle handle = proxyFactory.GetProxyHandle(className);

                if (handle == null)
                {
                    return;
                }

                List <Proxy> proxyList = handle.DoHandle(response);

                if (proxyList == null)
                {
                    return;
                }

                proxyList.ForEach(proxy =>
                {
                    httpUtil.VerifyProxy(proxy.Address, isSuccess =>
                    {
                        if (isSuccess)
                        {
                            proxyHelper.Add(proxy);
                            logger.InfoFormat("添加代理:{0}", proxy.Address.AbsoluteUri);
                        }
                    });
                });
            }
            catch (Exception ex)
            {
                logger.ErrorFormat("Error ProxyHandle:{0}", ex);
            }
        }
        protected override void WebResponseHandle(string response, ICatchItem catchItem)
        {
            try
            {
                string requestDomain = string.Empty;
                requestDomain = string.Format("{0}://{1}", catchItem.Uri.Scheme, catchItem.Uri.Host);

                string html = response;
                var    doc  = new HtmlDocument();

                doc.LoadHtml(html);

                IPage page = RuleConfig.PageRule.GetRule(PageType.AllWork);

                var nodes = doc.DocumentNode.SelectNodes("//li[@class='pager__item js-nologinLink']");
                var nextPageButtonNode = nodes.ElementAt(nodes.Count - 2);

                var nextPageNode = nextPageButtonNode?.SelectSingleNode("./a");

                string href = nextPageNode?.Attributes["href"].Value;
                if (string.IsNullOrEmpty(href))
                {
                    nextWorkPage = null;
                }
                else
                {
                    nextWorkPage = new AllWorkPage(StringUtil.FillWithDomain(href, requestDomain));
                }

                var galleryNodes = page.GetNodes(doc.DocumentNode, "Gallery");
                if (galleryNodes != null)
                {
                    logger.InfoFormat("获取作品列表: 个数:{0}", galleryNodes.Count);

                    galleryNodes.ToList().ForEach(item =>
                    {
                        Work work = new Work();

                        string _address = page.GetSingleNodeValue(item, "Gallery.Address");
                        work.Address    = StringUtil.FillWithDomain(_address, requestDomain)?.AbsoluteUri;

                        //work.Originality = page.GetSingleNodeValue(item, "Gallery.Originality");

                        //work.Role = page.GetSingleNodeValue(item, "Gallery.Role");

                        work.Name = page.GetSingleNodeValue(item, "Gallery.Name");

                        Author author = new Author();

                        string authorAddress = page.GetSingleNodeValue(item, "Gallery.Author.Address");
                        author.Address       = StringUtil.FillWithDomain(authorAddress, requestDomain)?.AbsoluteUri;

                        author.Name = work.Name;

                        authorHelper.Add(author);
                        workHelper.Add(work);

                        logger.InfoFormat("获得作品: [Address:{0}], [Originality:{1}], [Role:{2}], [Name:{3}]", work.Address, work.Originality, work.Role, work.Name);
                    });
                }
            }
            catch (Exception ex)
            {
                logger.ErrorFormat("WorkCatcher WebResponseHandle:{0}", ex);
            }
        }
Exemplo n.º 5
0
 protected virtual void WebResponseHandle(string response, ICatchItem catchItem)
 {
 }
Exemplo n.º 6
0
        protected override void WebResponseHandle(string response, ICatchItem catchItem)
        {
            try
            {
                string html      = response;
                var    doc       = new HtmlDocument();
                string coserName = string.Empty;
                string title     = string.Empty;
                string fileName  = string.Empty;

                doc.LoadHtml(html);

                IPage page = RuleConfig.PageRule.GetRule(PageType.Images);

                coserName = page.GetSingleNodeValue(doc.DocumentNode, "CoserName");

                title = page.GetSingleNodeValue(doc.DocumentNode, "Title");

                if (!string.IsNullOrEmpty(title))
                {
                    foreach (char rInvalidChar in System.IO.Path.GetInvalidPathChars())
                    {
                        title = title.Replace(rInvalidChar.ToString(), string.Empty);
                    }
                    string errChar = "\\/:*?";
                    foreach (char rInvalidChar in errChar)
                    {
                        title = title.Replace(rInvalidChar.ToString(), string.Empty);
                    }
                }

                var imgNodes = page.GetNodes(doc.DocumentNode, "WorkImage");
                if (imgNodes != null)
                {
                    if (catchItem.Extend != null && catchItem.Extend is Work)
                    {
                        Work   work        = (Work)catchItem.Extend;
                        string workAddress = catchItem.Uri.AbsoluteUri;

                        imgNodes.ToList()
                        .ForEach(x =>
                        {
                            string address = page.GetSingleNodeValue(x, "WorkImage.Src");

                            // 删除尾部限定大小
                            var regex = new Regex(@"((http|https)://)(([a-zA-Z0-9\._-]+)/)+(w\d+)");
                            if (regex.IsMatch(address))
                            {
                                address = address.Substring(0, address.LastIndexOf('/'));
                            }

                            logger.InfoFormat("添加图片:[Address:{0}, CoserName:{1}, WorkAddress:{2}]", address, coserName, workAddress);

                            WorkImage workImage = new WorkImage()
                            {
                                Work = work, WorkId = work.Id, Address = address
                            };
                            workImageHelper.Add(workImage);
                            // 事件推送图片添加成功
                            syncWorkImageAdd?.Invoke(workImage);
                        });
                        // 完成后,从作品抓取列表中清除
                        logger.InfoFormat("完成抓取: {0}", catchItem.Uri.AbsoluteUri);
                        // 事件推送作品图片已抓取
                        work.IsCatchImage = true;
                        work.Title        = title;
                    }
                }
            }
            catch (Exception ex)
            {
                logger.ErrorFormat("WorkImageCatcher WebResponseHandle:{0}", ex);
            }
        }