Esempio n. 1
0
        public void BoilerpipeSAXInputTest()
        {
            TextDocument doc = new BoilerpipeSAXInput(HTMLFetcher.Fetch(url).ToInputSource()).GetTextDocument();

            //NUnit.Framework.TestContext.Progress.WriteLine(doc.Title);
            NUnit.Framework.TestContext.Progress.WriteLine(ArticleExtractor.INSTANCE.GetText(doc));

            //NUnit.Framework.TestContext.Progress.WriteLine(doc.DebugString());
        }
Esempio n. 2
0
 /// <summary>
 /// Extracts text from the HTML code available from the given <see cref="Uri" />. NOTE: This method is
 /// mainly to be used for show case purposes. If you are going to crawl the Web, consider using
 /// <see cref="getText(InputSource)"/> instead.
 /// </summary>
 /// <param name="url">The URL pointing to the HTML code.</param>
 /// <returns>The extracted text.</returns>
 /// <exception cref="BoilerpipeProcessingException"></exception>
 public string GetText(Uri url)
 {
     return(GetText(HTMLFetcher.Fetch(url).ToInputSource()));
 }