Esempio n. 1
0
        private string getcontent(string url)
        {
            var    t = new NReadability.NReadabilityWebTranscoder();
            bool   b;
            string page = t.Transcode(url, out b);

            if (b)
            {
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(page);

                var title = doc.DocumentNode.SelectSingleNode("//title").InnerText;
                // var imgUrl = doc.DocumentNode.SelectSingleNode("//meta[@property='og:image']").Attributes["content"].Value;
                return(doc.DocumentNode.SelectSingleNode("//div[@id='readInner']").InnerText);
            }
            return("");
        }
Esempio n. 2
0
        public Article GetArticle(Uri uri, bool IncludeImage, string basePath)
        {
            NReadability.NReadabilityWebTranscoder transcoder = new NReadability.NReadabilityWebTranscoder();
            var domparam = new NReadability.DomSerializationParams();

            domparam.DontIncludeContentTypeMetaElement     = true;
            domparam.DontIncludeDocTypeMetaElement         = true;
            domparam.DontIncludeGeneratorMetaElement       = true;
            domparam.DontIncludeMobileSpecificMetaElements = true;
            var input = new NReadability.WebTranscodingInput(uri.OriginalString);

            input.DomSerializationParams = domparam;
            var output  = transcoder.Transcode(input);
            var title   = output.ExtractedTitle;
            var content = output.ExtractedContent;

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(content);
            rmHerfAtt(doc.DocumentNode);
            rmClassAtt(doc.DocumentNode);
            rmAllAttFromDiv(doc.DocumentNode);
            content = doc.DocumentNode.OuterHtml;
            Article art = new Article();

            art.Title = title;

            string ORhtml = rmStyle(content);



            if (IncludeImage)
            {
                string html = SaveImage(title, ORhtml, basePath);
                art.FileName = SaveAsHtml(title, html, basePath);
            }
            else
            {
                ORhtml       = rmImgNode(ORhtml);
                art.FileName = SaveAsHtml(title, ORhtml, basePath);
            }
            return(art);
        }