C# (CSharp) HTMLScraper示例

编程语言: C# (CSharp)

类/类型: HTMLScraper

hotexamples.com的示例: 5

C# (CSharp) HTMLScraper - 已找到5个示例。这些是从开源项目中提取的最受好评的HTMLScraper现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

GetArticleContent(1)

GetBookLinks(1)

GetCoverImage(1)

GetDownloadLink(1)

GetResults(1)

示例#1

显示文件

文件： Default.aspx.cs 项目： savardaaj/EBookDownloader

 public void Download(string downloadPage, string filePath, EBook book)
 {
     try
     {
         WebClient   wc      = new WebClient();
         HTMLScraper scraper = new HTMLScraper();
         wc.Headers.Add("User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0)");
         wc.DownloadProgressChanged += (sender, ex) =>
         {
             lblDownloadPercent.Text = "Downloaded " + ex.BytesReceived + "b of " + ex.TotalBytesToReceive + "b " + book.fileType;
         };
         wc.DownloadFileCompleted += (sender, ex) =>
         {
             if (ex.Cancelled)
             {
                 //TODO log error
             }
             else
             {
                 _completed = true;
             }
         };
         wc.DownloadFileAsync(new Uri(downloadPage), filePath);
         Thread.Sleep(10000);
         //while (!_completed)
         //    Thread.Sleep(1000);
     }
     catch (Exception e)
     {
     }
 }

示例#2

显示文件

    //Returns html link to download page
    public string ProcessWebRequest(string hostSiteSearchPage, string target)
    {
        Stream         stream;
        StreamReader   reader;
        Uri            websiteURI  = new Uri(hostSiteSearchPage + target); //declare page to search
        HttpWebRequest request     = (HttpWebRequest)HttpWebRequest.Create(websiteURI);
        HTMLScraper    htmlScraper = new HTMLScraper();

        request.UserAgent = "A .NET Web Crawler";

        string htmlData = "";

        //Get the HTML text / Load it into parser
        using (WebResponse response = request.GetResponse())
        {
            stream = response.GetResponseStream();
            reader = new StreamReader(stream);

            htmlData = reader.ReadToEnd();
        }

        return(htmlData);
    }

示例#3

显示文件

文件： Default.aspx.cs 项目： savardaaj/EBookDownloader

    /*
     * Main function for program.
     * TODO: init
     */
    public void DownloadBook()
    {
        EBook             book = new EBook();
        WebRequestHandler wrh  = new WebRequestHandler();

        HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
        HTMLScraper      scraper             = new HTMLScraper();
        EmailHandler     emailHandler        = new EmailHandler();
        WebClientHandler wch                = new WebClientHandler();
        string           downloadFolder     = "";
        string           coverImageHref     = "";
        string           hostSiteSearchPage = "http://libgen.io/search.php?req=";
        string           htmlData           = "";
        string           filePath           = "";
        string           downloadPage       = "";
        string           downloadLink       = "";

        try
        {
            tbLog.Text += "Initializing";
            lblDownloadPercent.Text = "Downloading";

            book.bookTitle = tbBookName.Text;
            book.author    = tbAuthor.Text;
            downloadFolder = tbDownloadFolder.Text;

            Directory.CreateDirectory(downloadFolder);

            book.uriBookTitle = Regex.Replace(book.bookTitle + " - " + book.author, @"\s+", "+");
            filePath          = downloadFolder + "\\" + book.bookTitle + " - " + book.author + book.fileType; //C:\users\Alex\Downloads\Outliers-Malcolm Gladwell.mobi

            book.coverImageLocation = downloadFolder + "\\" + book.bookTitle + " Cover";

            //Parse htmldata with XPath and return link for file types mobi, epub, pdf
            //returns htmlData page for initial search
            htmlData = wrh.ProcessWebRequest(hostSiteSearchPage, book.uriBookTitle);

            //Will set book.filetype, returns html with link to download
            downloadPage = scraper.GetBookLinks(htmlData, book);

            book.fileLocation = filePath + book.fileType;

            htmlData = wrh.ProcessWebRequest(downloadPage, "");

            downloadLink = scraper.GetDownloadLink(htmlData);


            if (downloadLink != "")
            {
                lblDownloadPercent.Text = "Downloading " + book.fileType;
                //Download the EBook
                Download(downloadLink, book.fileLocation, book);
                if (book.fileType == ".pdf" || book.fileType == ".epub")
                {
                    coverImageHref = scraper.GetCoverImage(book);
                    Download(coverImageHref, book.coverImageLocation, book);
                }
                emailHandler.SendEmail(book, tbKindleEmail.Text);
            }
            else
            {
                tbLog.Text += "No link results";
            }
        }
        catch (Exception s)
        {
            //TODO Log error
        }
    }

示例#4

显示文件

文件： APIController.cs 项目： EJuang/RedditNewspaper

        public ActionResult Index(string targetURL)
        {
            string articleContent = HTMLScraper.GetArticleContent(targetURL);

            return(Json(articleContent, JsonRequestBehavior.AllowGet));
        }

示例#5

显示文件

        public ActionResult SubmitScrape(SearchResultViewModel formData)
        {
            string urlAddress = string.Format("https://www.google.co.uk/search?num={0}&q={1}", formData.SearchResultAmount, formData.SearchTerms);
            string rawHTML    = _webService.GetHTML(urlAddress);

            formData.SearchResult = string.Format("The URL appeared in the search results in position(s): {0}", HTMLScraper.GetResults(rawHTML, formData.SearchURL));
            return(RedirectToAction("SearchResults", "Home", formData));
        }