private string getcontent(string url) { var t = new NReadability.NReadabilityWebTranscoder(); bool b; string page = t.Transcode(url, out b); if (b) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(page); var title = doc.DocumentNode.SelectSingleNode("//title").InnerText; // var imgUrl = doc.DocumentNode.SelectSingleNode("//meta[@property='og:image']").Attributes["content"].Value; return(doc.DocumentNode.SelectSingleNode("//div[@id='readInner']").InnerText); } return(""); }
public Article GetArticle(Uri uri, bool IncludeImage, string basePath) { NReadability.NReadabilityWebTranscoder transcoder = new NReadability.NReadabilityWebTranscoder(); var domparam = new NReadability.DomSerializationParams(); domparam.DontIncludeContentTypeMetaElement = true; domparam.DontIncludeDocTypeMetaElement = true; domparam.DontIncludeGeneratorMetaElement = true; domparam.DontIncludeMobileSpecificMetaElements = true; var input = new NReadability.WebTranscodingInput(uri.OriginalString); input.DomSerializationParams = domparam; var output = transcoder.Transcode(input); var title = output.ExtractedTitle; var content = output.ExtractedContent; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(content); rmHerfAtt(doc.DocumentNode); rmClassAtt(doc.DocumentNode); rmAllAttFromDiv(doc.DocumentNode); content = doc.DocumentNode.OuterHtml; Article art = new Article(); art.Title = title; string ORhtml = rmStyle(content); if (IncludeImage) { string html = SaveImage(title, ORhtml, basePath); art.FileName = SaveAsHtml(title, html, basePath); } else { ORhtml = rmImgNode(ORhtml); art.FileName = SaveAsHtml(title, ORhtml, basePath); } return(art); }