Exemplo n.º 1
0
        public void Download(string url, string targetDir, string targetFname)
        {
            string fname = targetFname.RemoveColon();

            string filepath = Path.Combine(targetDir, fname);

            //ensure it respects mppl
            filepath = Utilities.TrimPathPart(filepath, _futureleanCourse.Max_path_part_len);

            WebHeaderCollection responseHeaders = _futureleanCourse._client.ResponseHeaders;
            int  contentLength = GetContentLength(responseHeaders);
            bool isFileNeeded  = IsFileNeeded(filepath, contentLength, fname);

            if (isFileNeeded)
            {
                if (Path.GetExtension(filepath) == ".html")
                {
                    string content = _futureleanCourse._client.DownloadString(url);
                    NReadabilityTranscoder transcoder        = new NReadabilityTranscoder();
                    TranscodingInput       tiInput           = new TranscodingInput(content);
                    TranscodingResult      transcodedContent = transcoder.Transcode(tiInput);
                    //.Transcode(content, out success);
                    File.WriteAllText(filepath, transcodedContent.ExtractedContent);
                }
                else
                {
                    _futureleanCourse._client.DownloadFile(url, filepath);
                }
            }
        }
        public void TestImageSourceTransformer()
        {
            Func <AttributeTransformationInput, AttributeTransformationResult> imgSrcTransformer =
                input =>
                new AttributeTransformationResult
            {
                TransformedValue           = string.Format("http://imageresizer.com/u={0}", input.AttributeValue),
                OriginalValueAttributeName = "origsrc",
            };

            string originalSrcValue = "http://example.com/some_image.jpg";
            string expectedSrcValue = imgSrcTransformer.Invoke(new AttributeTransformationInput {
                AttributeValue = originalSrcValue, Element = null
            }).TransformedValue;

            string dummyParagraphs = "<p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p>";
            string htmlContent     = "<html><body>" + dummyParagraphs + "<p><img src=\"" + originalSrcValue + "\" /></p>" + dummyParagraphs + "</body></html>";

            var nReadabilityTranscoder =
                new NReadabilityTranscoder
            {
                ImageSourceTranformer = imgSrcTransformer,
            };

            bool   mainContentExtracted;
            string transcodedContent = nReadabilityTranscoder.Transcode(htmlContent, "http://immortal.pl/", out mainContentExtracted);

            Assert.IsTrue(mainContentExtracted);
            Assert.IsTrue(transcodedContent.Contains("src=\"" + expectedSrcValue + "\""));
            Assert.IsTrue(transcodedContent.Contains("origsrc=\"" + originalSrcValue + "\""));
        }
        public void TestAnchorHrefTransformer()
        {
            Func <AttributeTransformationInput, AttributeTransformationResult> anchorHrefTransformer =
                input =>
                new AttributeTransformationResult
            {
                TransformedValue           = string.Format("http://redirector.com/u={0}", input.AttributeValue),
                OriginalValueAttributeName = "orighref",
            };

            string originalHrefValue = "http://example.com/some_article.html";
            string expectedHrefValue = anchorHrefTransformer.Invoke(new AttributeTransformationInput {
                AttributeValue = originalHrefValue, Element = null
            }).TransformedValue;

            string dummyParagraphs = "<p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p>";
            string htmlContent     = "<html><body>" + dummyParagraphs + "<p><a href=\"" + originalHrefValue + "\">Some article</a></p>" + dummyParagraphs + "</body></html>";

            var nReadabilityTranscoder =
                new NReadabilityTranscoder
            {
                AnchorHrefTranformer = anchorHrefTransformer,
            };

            bool   mainContentExtracted;
            string transcodedContent = nReadabilityTranscoder.Transcode(htmlContent, "http://immortal.pl/", out mainContentExtracted);

            Assert.IsTrue(mainContentExtracted);
            Assert.IsTrue(transcodedContent.Contains("href=\"" + expectedHrefValue + "\""));
            Assert.IsTrue(transcodedContent.Contains("orighref=\"" + originalHrefValue + "\""));
        }
Exemplo n.º 4
0
        /// <summary>
        /// Extracts the readable information.
        /// </summary>
        /// <param name="uri">The URI.</param>
        /// <param name="textStream">The text stream.</param>
        /// <param name="options">The options.</param>
        /// <param name="encoding">The encoding.</param>
        /// <returns></returns>
        protected TranscodingResult ExtractReadableInformation(
            Uri uri,
            Stream textStream,
            ReadOptions options,
            Encoding encoding = null)
        {
            // response stream to text
            textStream.Position = 0;
            StreamReader streamReader = new StreamReader(textStream, encoding ?? Encoding.UTF8);

            _rawHTML = streamReader.ReadToEnd();

            // set properties for processing
            TranscodingInput transcodingInput = new TranscodingInput(_rawHTML)
            {
                Url = uri.ToString(),
                DomSerializationParams = new DomSerializationParams()
                {
                    BodyOnly    = !options.HasHeaderTags,
                    NoHeadline  = !options.HasHeadline,
                    PrettyPrint = options.PrettyPrint,
                    DontIncludeContentTypeMetaElement     = true,
                    DontIncludeMobileSpecificMetaElements = true,
                    DontIncludeDocTypeMetaElement         = false,
                    DontIncludeGeneratorMetaElement       = true,
                    ReplaceImagesWithPlaceholders         = options.ReplaceImagesWithPlaceholders
                }
            };

            // process/transcode HTML
            return(_transcoder.Transcode(transcodingInput));
        }
Exemplo n.º 5
0
        private static String GetWebpageContents(String url)
        {
            var nreadabilityTranscoder = new NReadabilityTranscoder();

            using (var wc = new WebClient())
            {
                var rawHtml          = wc.DownloadString(url);
                var transcodingInput = new TranscodingInput(rawHtml);
                var extractedHtml    = nreadabilityTranscoder.Transcode(transcodingInput).ExtractedContent;
                var pageHtml         = new HtmlDocument();
                pageHtml.LoadHtml(extractedHtml);
                return(pageHtml.DocumentNode.SelectSingleNode("//body").InnerText);
            }
        }
Exemplo n.º 6
0
    private static void Main(string[] args)
    {
      if (args == null || args.Length != 2)
      {
        DisplayUsage();
        Environment.Exit(1);
      }

      string inputFile = args[0];
      string outputFile = args[1];

      var nReadabilityTranscoder = new NReadabilityTranscoder();
       
      File.WriteAllText(
        outputFile,
        nReadabilityTranscoder.Transcode(new TranscodingInput(File.ReadAllText(inputFile))).ExtractedContent);
    }
Exemplo n.º 7
0
        private static void Main(string[] args)
        {
            if (args == null || args.Length != 2)
            {
                DisplayUsage();
                Environment.Exit(1);
            }

            string inputFile  = args[0];
            string outputFile = args[1];

            var  nReadabilityTranscoder = new NReadabilityTranscoder();
            bool mainContentExtracted;

            File.WriteAllText(
                outputFile,
                nReadabilityTranscoder.Transcode(File.ReadAllText(inputFile), out mainContentExtracted));
        }
Exemplo n.º 8
0
        public async Task <IActionResult> Get([FromQuery] string q, [FromQuery] string e, [FromQuery] string f)
        {
            var    transcoder = new NReadabilityTranscoder();
            string content;

            if (string.IsNullOrEmpty(q))
            {
                return(NotFound());
            }

            try
            {
                using (var wc = new WebClient())
                {
                    wc.Encoding = Encoding.UTF8;
                    content     = wc.DownloadString(q);
                }

                var transcodedContent =
                    transcoder.Transcode(new TranscodingInput(content));

                if (string.IsNullOrEmpty(f) || f != "y")
                {
                    content = transcodedContent.ExtractedContent;
                }

                var posHead = content.IndexOf("<head");
                if (posHead > 0)
                {
                    var endHead = content.IndexOf('>', posHead) + 1;
                    content = content.Insert(endHead, string.Format("<base href='{0}' />", q));
                } // Fix relative path error

                if (!string.IsNullOrEmpty(e))
                {
                    await SendMailAsync(e, transcodedContent.ExtractedTitle, content, q);
                }

                return(Ok(content));
            }catch (Exception ex)
            {
                return(BadRequest(ex.Message));
            }
        }
Exemplo n.º 9
0
        private static CleanText getCleanText(string url, string content)
        {
            var transcoder = new NReadabilityTranscoder();
            bool success;
            try
            {
            //transcoder.Ti
            TranscodingResult textRes = transcoder.Transcode(new TranscodingInput(content));

            if (textRes.ContentExtracted)
            {
                var title = "";
                if (textRes.TitleExtracted)
                    title = textRes.ExtractedTitle;
                else
                {
                    var titleNode = transcoder.FoundDocument.GetElementsByTagName("title").First();
                    if (titleNode != null)
                        title = titleNode.Value;
                }
                var imgUrl = "";
                var imgNode = transcoder.FoundDocument.GetElementsByTagName("meta").Where(e => e.GetAttributeValue("property", "") == "og:image").First();//doc.SelectSingleNode("//meta[@property='og:image']");
                if (imgNode != null)
                    imgUrl = imgNode.GetAttributeValue("content","");

                var mainText = "";
                if (transcoder.FoundContentElement != null)
                {
                    mainText = transcoder.FoundContentElement.GetInnerHtml();
                }

                return new CleanText { Title = title, Image = imgUrl, Content = mainText, Url = url, FetchDate = DateTime.Now };
            }
            else
            {
                return new CleanText { Title = "#FAIL#", Image = "", Content = "", Url = url, FetchDate = DateTime.Now };
            }
            }
            catch (Exception ex)
            {
            return new CleanText { Title = "#FAIL#", Image = ex.Message, Content = "", Url = url, FetchDate = DateTime.Now };
            }
        }
        // TODO: if time, add test case 7 (the sample is already in the repo but needs fixing)
        public void TestSampleInputs([Values(1, 2, 3, 4, 5, 6, 8, 9)] int sampleInputNumber)
        {
            string sampleInputNumberStr = sampleInputNumber.ToString().PadLeft(2, '0');
            string content = File.ReadAllText(string.Format(@"SampleInput\SampleInput_{0}.html", sampleInputNumberStr));
            bool   mainContentExtracted;
            string transcodedContent = _nReadabilityTranscoder.Transcode(content, out mainContentExtracted);

            const string outputDir = "SampleOutput";

            if (!Directory.Exists(outputDir))
            {
                Directory.CreateDirectory(outputDir);
            }

            File.WriteAllText(
                Path.Combine(outputDir, string.Format("SampleOutput_{0}.html", sampleInputNumberStr)),
                transcodedContent,
                Encoding.UTF8);

            switch (sampleInputNumber)
            {
            case 1: // washingtonpost.com - "Court Puts Off Decision On Indefinite Detention"
                Assert.IsTrue(transcodedContent.Contains("The Supreme Court yesterday vacated a lower"));
                Assert.IsTrue(transcodedContent.Contains("The justices did not rule on the merits"));
                Assert.IsTrue(transcodedContent.Contains("But the government said the issues were now"));
                break;

            case 2: // devBlogi.pl - "Po co nam testerzy?"
                Assert.IsTrue(transcodedContent.Contains("Moja siostra sprawiła swoim dzieciom szczeniaczka"));
                Assert.IsTrue(transcodedContent.Contains("Z tresowaniem psów jest tak, że reakcja musi być"));
                Assert.IsTrue(transcodedContent.Contains("Korzystając z okazji, chcielibyśmy dowiedzieć się"));
                break;

            case 3: // codinghorror.com - "Welcome Back Comments"
                Assert.IsTrue(transcodedContent.Contains("I apologize for the scarcity of updates lately."));
                Assert.IsTrue(transcodedContent.Contains("Most of all, I blame myself."));
                Assert.IsTrue(transcodedContent.Contains("And, most of all, thanks to"));
                break;

            case 4: // sample page; only with paragraphs
                Assert.IsTrue(transcodedContent.Contains("Lorem ipsum dolor sit amet, consectetur adipiscing elit."));
                Assert.IsTrue(transcodedContent.Contains("Mauris nec massa ante, id fringilla nisi."));
                Assert.IsTrue(transcodedContent.Contains("Nulla facilisi. Proin lacinia venenatis elit, nec ornare elit varius eu."));
                Assert.IsTrue(transcodedContent.Contains("Duis vitae ultricies nibh."));
                Assert.IsTrue(transcodedContent.Contains("Vestibulum dictum iaculis nisl, lobortis luctus justo porttitor eu."));
                break;

            case 5: // mnmlist.com - "clear distractions"
                Assert.IsTrue(transcodedContent.Contains("When it comes to minimalism in"));
                Assert.IsTrue(transcodedContent.Contains("Here’s how:"));
                Assert.IsTrue(transcodedContent.Contains("Set limits on your work hours. If your time is limited, you’ll find ways to make the most of that limited time."));
                break;

            case 6:                                                       // sample page; nbsp
                Assert.IsTrue(transcodedContent.Contains("1.  Item 1.")); // there's a non-breaking space here
                break;

            case 7: // http://nplusonemag.com/treasure-island
                Assert.IsTrue(transcodedContent.Contains("stretched out storylines"));
                Assert.IsTrue(transcodedContent.Contains("It is no longer a smart social move to brag about not owning a television."));
                Assert.IsTrue(transcodedContent.Contains("Of course, some habits can be hard to give up completely."));
                break;

            case 8: // NYTimes leading paragraph
                Assert.IsTrue(transcodedContent.Contains("freed from house arrest on Saturday, setting her on the path"));
                Assert.IsTrue(transcodedContent.Contains("confrontation with the generals who had kept her out of the public eye"));
                Assert.IsTrue(transcodedContent.Contains("Western capitals was one of celebration"));
                break;

            case 9: // http://www.udidahan.com/2010/08/31/race-conditions-dont-exist/ - rich sidebar should not be identified as main content
                Assert.IsTrue(transcodedContent.Contains("Not in the business world anyway."));
                Assert.IsTrue(transcodedContent.Contains("we could look at modeling the acceptance"));
                Assert.IsTrue(transcodedContent.Contains("Keep an eye out."));
                break;

            default:
                throw new NotSupportedException("Unknown sample input number (" + sampleInputNumber + "). Have you added another sample input? If so, then add appropriate asserts here as well.");
            }

            Assert.IsTrue(mainContentExtracted);
        }
        public void TestImageSourceTransformer()
        {
            // arrange
              Func<AttributeTransformationInput, AttributeTransformationResult> imgSrcTransformer =
            input =>
            new AttributeTransformationResult
              {
            TransformedValue = string.Format("http://imageresizer.com/u={0}", input.AttributeValue),
            OriginalValueAttributeName = "origsrc",
              };

              string originalSrcValue = "http://example.com/some_image.jpg";
              string expectedSrcValue = imgSrcTransformer.Invoke(new AttributeTransformationInput { AttributeValue = originalSrcValue, Element = null }).TransformedValue;

              string dummyParagraphs = "<p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p>";
              string htmlContent = "<html><body>" + dummyParagraphs + "<p><img src=\"" + originalSrcValue + "\" /></p>" + dummyParagraphs + "</body></html>";

              var nReadabilityTranscoder =
            new NReadabilityTranscoder
              {
            ImageSourceTranformer = imgSrcTransformer,
              };

              var transcodingInput =
            new TranscodingInput(htmlContent)
              {
            Url = "http://immortal.pl/",
              };

              // act
              TranscodingResult transcodingResult = nReadabilityTranscoder.Transcode(transcodingInput);

              // assert
              Assert.IsTrue(transcodingResult.ContentExtracted);
              Assert.IsTrue(transcodingResult.ExtractedContent.Contains("src=\"" + expectedSrcValue + "\""));
              Assert.IsTrue(transcodingResult.ExtractedContent.Contains("origsrc=\"" + originalSrcValue + "\""));
        }
        public void TestAnchorHrefTransformer()
        {
            // arrange
              Func<AttributeTransformationInput, AttributeTransformationResult> anchorHrefTransformer =
            input =>
            new AttributeTransformationResult
              {
            TransformedValue = string.Format("http://redirector.com/u={0}", input.AttributeValue),
            OriginalValueAttributeName = "orighref",
              };

              string originalHrefValue = "http://example.com/some_article.html";
              string expectedHrefValue = anchorHrefTransformer.Invoke(new AttributeTransformationInput { AttributeValue = originalHrefValue, Element = null }).TransformedValue;

              string dummyParagraphs = "<p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p><p>Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet. Lorem ipsum dolor et amet.</p>";
              string htmlContent = "<html><body>" + dummyParagraphs + "<p><a href=\"" + originalHrefValue + "\">Some article</a></p>" + dummyParagraphs + "</body></html>";

              var nReadabilityTranscoder =
            new NReadabilityTranscoder
              {
            AnchorHrefTranformer = anchorHrefTransformer,
              };

              var transcodingInput =
            new TranscodingInput(htmlContent)
              {
            Url = "http://immortal.pl/",
              };

              // act
              TranscodingResult transcodingResult = nReadabilityTranscoder.Transcode(transcodingInput);

              // assert
              Assert.IsTrue(transcodingResult.ContentExtracted);
              Assert.IsTrue(transcodingResult.ExtractedContent.Contains("href=\"" + expectedHrefValue + "\""));
              Assert.IsTrue(transcodingResult.ExtractedContent.Contains("orighref=\"" + originalHrefValue + "\""));
        }
Exemplo n.º 13
0
        protected override void Handle(Page page)
        {
            var elements = page.Selectable.SelectList(Selectors.XPath("//div[@class='result']")).Nodes();
            var results  = new List <BaiduNews>();
            var keyword  = page.Request.Extras.Aggregate("", (current, kv) => string.IsNullOrEmpty(current) ? kv.Value : $"{current},{kv.Value}");

            foreach (var element in elements)
            {
                var title  = element.Select(Selectors.XPath("h3[@class='c-title']/a")).GetValue().Replace("<em>", "").Replace("</em>", "");
                var url    = element.Select(Selectors.XPath("h3[@class='c-title']/a/@href")).GetValue();
                var author = element.Select(Selectors.XPath(".//div/p[@class='c-author']/text()")).GetValue();
                var time   = string.Empty;
                try
                {
                    time = author.Substring(author.IndexOf("&nbsp;&nbsp;", StringComparison.Ordinal) + 12);
                }
                catch (Exception e)
                {
                    Console.WriteLine(e);
                    throw;
                }

                var news = new BaiduNews
                {
                    Keyword = keyword,
                    Title   = title,
                    Time    = time,
                    Url     = url
                };
                page.AddTargetRequest(url, increaseDeep: false);

                results.Add(news);
            }
            page.AddResultItem("News", results);

            if (!results.Any())
            {
                //bool success;
                var transcoder = new NReadabilityTranscoder();
                var input      = new TranscodingInput(page.Content)
                {
                    //DomSerializationParams = new DomSerializationParams()
                    //{
                    //	DontIncludeDocTypeMetaElement = true,
                    //	DontIncludeContentTypeMetaElement = true,
                    //	DontIncludeGeneratorMetaElement = true,
                    //	DontIncludeMobileSpecificMetaElements = true,
                    //	PrettyPrint = true
                    //}
                };
                var text = "";
                try
                {
                    var result   = transcoder.Transcode(input);
                    var document = new HtmlDocument {
                        OptionAutoCloseOnEnd = true
                    };
                    document.LoadHtml(result.ExtractedContent);
                    var node = document.DocumentNode.SelectSingleNode("//div/div/div/div");
                    text = node.InnerText.Trim('\r', '\n', ' ');
                }
                catch (Exception e)
                {
                    Console.WriteLine(e);
                    //throw;
                }

                page.AddResultItem("UpdateNews", new UpdateNews
                {
                    Html = page.Content,
                    Text = text,
                    Url  = page.Url
                });
            }
        }
Exemplo n.º 14
0
        public async Task Dowload(string url, PerformContext context)
        {
            using (var client = new HttpClient())
            {
                client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36");
                var response = await client.GetAsync(url);

                if (response.StatusCode != HttpStatusCode.OK)
                {
                    return;
                }

                var stream = await response.Content.ReadAsStreamAsync();

                byte[] bytes = new byte[stream.Length];
                await stream.ReadAsync(bytes, 0, bytes.Length);

                var isUTF8 = IsTextUTF8(ref bytes);
                Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
                Encoding encoding;
                if (isUTF8)
                {
                    encoding = Encoding.UTF8;
                }
                else
                {
                    encoding = Encoding.GetEncoding("GBK");
                }

                var html = encoding.GetString(bytes);
                //var document = new HtmlDocument { OptionAutoCloseOnEnd = true };

                //document.LoadHtml(html);
                //foreach (var selectNode in document.DocumentNode.SelectNodes("//meta"))
                //{
                //    if (selectNode.Attributes["http-equiv"]?.Value == "Content-Type")
                //    {
                //        var contentType = selectNode.Attributes["content"].Value;
                //        var match = Regex.Match(contentType, "charset=(?<encoding>[a-zA-Z0-9\\-]*)");
                //        if (match.Success)
                //        {
                //            var encodingName = match.Groups["encoding"].Value;
                //            html = Encoding.GetEncoding(encodingName).GetString(bytes);
                //            break;
                //        }
                //    }

                //    if (selectNode.Attributes["charset"] != null)
                //    {
                //        var encodingName = selectNode.Attributes["charset"].Value;
                //        html = Encoding.GetEncoding(encodingName).GetString(bytes);
                //        break;
                //    }
                //}
                //document.LoadHtml(html);
                //using (var ms = new MemoryStream())
                //using (StreamWriter sw = new StreamWriter(ms, Encoding.UTF8))
                //{
                //    document.Save(sw);
                //    ms.Position = 0;
                //    var xdoc = XDocument.Load(ms);
                //    //using (var sr = new StreamReader(ms))
                //    //{

                //    //    html = await sr.ReadToEndAsync();
                //    //}
                //}

                //var html = await response.Content.ReadAsStringAsync();
                if (string.IsNullOrEmpty(html))
                {
                    return;
                }

                var transcoder = new NReadabilityTranscoder();
                var input      = new TranscodingInput(html);
                try
                {
                    SgmlDomBuilder builder = new SgmlDomBuilder();
                    var            s       = builder.BuildDocument(html);
                    var            result  = transcoder.Transcode(input);

                    var document = new HtmlDocument {
                        OptionAutoCloseOnEnd = true
                    };
                    document.LoadHtml(result.ExtractedContent);
                    var node = document.DocumentNode.SelectSingleNode("//div/div/div/div");
                    var text = node.InnerText.Trim('\r', '\n', ' ', '\t');
                    context.WriteLine("抽取内容为:");
                    context.WriteLine(text);

                    const string cmdText = @"UPDATE [dbo].[BaiduNews] SET [Html]=@Html,[Text]=@Text WHERE [Url]=@Url";

                    await _connection.ExecuteAsync(cmdText, new { Html = html, Text = text, Url = url });

                    await _connection.ExecuteAsync(
                        @"UPDATE a SET a.[NewsCount]=a.[NewsCount]+1 FROM [dbo].[Monitor] a JOIN [dbo].[BaiduNews] b ON a.[Tag]=b.[Keyword] WHERE b.[Url]=@Url",
                        new { Url = url });
                }
                catch (Exception e)
                {
                    context.WriteLine(e);
                }
            }
        }