コード例 #1
0
        //get img urls
        private string GetImageUrls(HtmlDocument document, PreviewArticleBindingModel config)
        {
            //find img in content
            var contentElement = document.DocumentNode.QuerySelector(config.ContentSelector);

            if (contentElement == null)
            {
                return(null);
            }
            var imgEls = contentElement.QuerySelectorAll("img");

            if (imgEls == null || imgEls.Count() == 0)
            {
                return(null);
            }
            var imgUrlStringBuilder = new StringBuilder();

            foreach (var imgEl in imgEls)
            {
                var url = GetImgUrl(imgEl);
                imgUrlStringBuilder.Append(url);
                imgUrlStringBuilder.Append(",");
            }
            //remove last coma
            if (imgUrlStringBuilder.Length > 0)
            {
                imgUrlStringBuilder.Length--;
            }

            return(imgUrlStringBuilder.ToString());
        }
コード例 #2
0
        public IHttpActionResult PreviewArticle(PreviewArticleBindingModel model)
        {
            Debug.WriteLine("Hello preview article");
            if (!ModelState.IsValid)
            {
                return(BadRequest(ModelState));
            }
            HtmlDocument document = null;

            var url = model.Link;

            try
            {
                document = _htmlWeb.Load(url);
            }
            catch (Exception err)
            {
                Console.WriteLine("LOAD HTML DOC FAILED: " + err.Message);
                return(Content(HttpStatusCode.InternalServerError, "can not get html document"));
            }
            //get title
            var title       = GetTitle(document, model);
            var description = GetDescription(document, model);
            var content     = GetContent(document, model);
            var viewModel   = new PreviewArticleViewModel()
            {
                Content     = content,
                Description = description,
                Title       = title
            };

            return(Json(viewModel));
        }
コード例 #3
0
        private string GetContent(HtmlDocument document, PreviewArticleBindingModel config)
        {
            if (config.ContentSelector == null)
            {
                return(null);
            }


            if (config.RemovalSelector != null && config.RemovalSelector.Length != 0)
            {
                var removalElements = document.DocumentNode.QuerySelectorAll(config.RemovalSelector).ToList();
                foreach (var removeItem in removalElements)
                {
                    //remove from html document

                    removeItem.Remove();
                }
            }
            //get content
            var contentContainer = document.DocumentNode.QuerySelector(config.ContentSelector);

            if (contentContainer == null)
            {
                return(null);
            }

            var listContentChildNode = contentContainer.ChildNodes.ToList();

            foreach (var node in listContentChildNode)
            {
                //skip blank text node
                if (node.Name.Equals("#text"))
                {
                    continue;
                }
                //scan for paragraph
                if (node.Name.Equals("p"))
                {
                    node.RemoveClass();
                    node.AddClass("article-paragraph");
                }
                //scan for img container node
                var imgElement = node.QuerySelector("img");
                if (imgElement != null)
                {
                    //get current img url and caption
                    var imgUrl     = GetImgUrl(imgElement);
                    var imgCaption = GetImgCaption(node);
                    //generate html code for img
                    GenerateHtmlForImg(imgUrl, imgCaption, node);
                }
            }


            return(contentContainer.InnerHtml);
        }
コード例 #4
0
        //get article description
        private string GetDescription(HtmlDocument document, PreviewArticleBindingModel config)
        {
            if (config.DescriptionSelector == null)
            {
                return(null);
            }
            var descriptionEl = document.DocumentNode.QuerySelector(config.DescriptionSelector);

            if (descriptionEl == null)
            {
                return(null);
            }
            var description = descriptionEl.InnerText.Trim();

            return(description);
        }
コード例 #5
0
        //for content crawler
        private string GetTitle(HtmlDocument document, PreviewArticleBindingModel config)
        {
            if (config.TitleSelector == null)
            {
                return(null);
            }
            var titleElement = document.DocumentNode.QuerySelector(config.TitleSelector);

            if (titleElement == null)
            {
                return(null);
            }
            var title = titleElement.InnerText.Trim();

            return(title);
        }