Ejemplo n.º 1
0
        /// <summary>
        /// 过滤站点html
        /// </summary>
        /// <param name="html"></param>
        /// <param name="filter"></param>
        /// <returns></returns>
        private static string FilterHtml(string html, string filter)
        {
            if (filter == null || filter == "")
            {
                return(html);
            }

            FilterChain chain = LoadFilter(filter);

            if (chain.Count() > 0)
            {
                html = chain.DoFilter(html);
            }

            return(html);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 取图片页面
        /// </summary>
        public static IEnumerable <ImageModel> GetListImage(SexSpiders sex, string url)
        {
            string html = sex.ImgType != null && sex.ImgType.Contains("ajax") ? GetJSContent(url, sex.PageEncode) : GetHtmlContent(url, sex.PageEncode, sex.Domain);

            //过滤站点
            html = FilterHtml(html, sex.SiteFilter);
            html = ReplaceHtml(html, sex.SiteReplace);

            FilterChain chain = LoadFilter(sex.ImageFilter);

            var parser    = new HtmlParser();
            var _document = parser.Parse(html);
            var content   = _document.QuerySelectorAll(sex.ImageDiv);

            foreach (var item in content)
            {
                string link = "";
                if (chain.Count() > 0)
                {
                    link = chain.DoFilter(item.OuterHtml);
                }
                else
                {
                    link = item.GetAttribute("src") ?? item.GetAttribute("href");
                }

                if (String.IsNullOrEmpty(link))
                {
                    continue;
                }

                string _image = GetLink(link, sex.Domain);

                yield return(new ImageModel
                {
                    ImageUrl = _image,
                    ImageDomain = sex.Domain
                });
            }
        }