public void BoilerpipeSAXInputTest() { TextDocument doc = new BoilerpipeSAXInput(HTMLFetcher.Fetch(url).ToInputSource()).GetTextDocument(); //NUnit.Framework.TestContext.Progress.WriteLine(doc.Title); NUnit.Framework.TestContext.Progress.WriteLine(ArticleExtractor.INSTANCE.GetText(doc)); //NUnit.Framework.TestContext.Progress.WriteLine(doc.DebugString()); }
/// <summary> /// Extracts text from the HTML code available from the given <see cref="Uri" />. NOTE: This method is /// mainly to be used for show case purposes. If you are going to crawl the Web, consider using /// <see cref="getText(InputSource)"/> instead. /// </summary> /// <param name="url">The URL pointing to the HTML code.</param> /// <returns>The extracted text.</returns> /// <exception cref="BoilerpipeProcessingException"></exception> public string GetText(Uri url) { return(GetText(HTMLFetcher.Fetch(url).ToInputSource())); }