コード例 #1
0
 protected void DownloadImage(WebClient wc, MemoryStream destinationStream, string url)
 {
     while (true)
     {
         try
         {
             using (var ms = new MemoryStream(wc.DownloadData(url)))
             {
                 var bm = new Bitmap(ms);
                 bm.Save(destinationStream, ImageFormat.Png);
                 return;
             }
         }
         catch (WebException ex)
         {
             Task.Delay(1000).Wait();
             ConsoleDisplay.AddAdditionalMessageDisplay(this._entry, $"{url} : {ex.Message}");
         }
     }
 }
コード例 #2
0
        // Valid JSON configuration for this class
        // {
        // "Parser": "XPath",
        // "BaseAddress": "http://beyondtheimpossible.org/comic/1-before-the-beginning-2/",
        // "NextButtonSelector": "//@href[@class='comic-nav-base comic-nav-next']",
        // "ChapterTitleSelector": "//*[@class='post-title']",
        // "ChapterContentSelector": "//*[@class='entry']",
        // "Author": "Ffurla",
        // "Date": "2016-03-18T13:24:36.2855417+01:00",
        // "Title": "Beyond the Impossible",
        // "Description": null
        // }
        protected override void ScrapeWebPage(WebComicEntry entry, Document ebook, string nextPageUrl = null)
        {
            // http://htmlagilitypack.codeplex.com/wikipage?title=Examples
            do
            {
                string content = string.Empty;
                string title;
                var    currentUrl = nextPageUrl ?? entry.BaseAddress;
                try
                {
                    using (var wc = new WebClient())
                    {
                        using (var ms = new MemoryStream(wc.DownloadData(currentUrl)))
                        {
                            HtmlDocument hDoc = new HtmlDocument();
                            hDoc.Load(ms, true);
                            XPathNavigator xNav = hDoc.CreateNavigator();

                            try
                            {
                                title = xNav.SelectSingleNode(entry.ChapterTitleSelector).Value;
                            }
                            catch
                            {
                                if (entry.IgnoreMissingChapterName)
                                {
                                    title = null;
                                }
                                else
                                {
                                    ConsoleDisplay.AddAdditionalMessageDisplay(
                                        entry,
                                        $"Title not found for page {this.PageCounter}, replacing with default value");
                                    title = WebUtility.HtmlEncode($"Chapter - {this.PageCounter}");
                                }
                            }

                            XPathNodeIterator xIter = xNav.Select(entry.ChapterContentSelector);

                            if (entry.Content == WebComicEntry.ContentType.Text)
                            {
                                content += $"<h1>{title}</h1>";

                                while (xIter.MoveNext())
                                {
                                    var temp = $"<{xIter.Current.Name}>{xIter.Current.Value}</{xIter.Current.Name}>";
                                    content += temp;
                                }

                                this.AddPage(ebook, content, title, currentUrl, entry.IgnoreMissingChapterName);
                            }
                            else if (entry.Content == WebComicEntry.ContentType.Image)
                            {
                                while (xIter.MoveNext())
                                {
                                    this.AddImage(ebook, wc, xIter.Current.Value, currentUrl);
                                }
                            }
                            else if (entry.Content == WebComicEntry.ContentType.Mixed)
                            {
                                while (xIter.MoveNext())
                                {
                                    var subIter = xIter.Current.SelectChildren(XPathNodeType.Element);
                                    this.AddCompositePage(ebook, subIter, title, wc, currentUrl, entry);
                                }
                            }

                            var tempNextPageUrl = xNav.SelectSingleNode(entry.NextButtonSelector)?.Value;

                            try
                            {
                                var uri = new Uri(tempNextPageUrl);
                                nextPageUrl = tempNextPageUrl;
                            }
                            catch (UriFormatException)
                            {
                                nextPageUrl = string.Format(entry.AddressPattern, tempNextPageUrl);
                            }
                            catch (ArgumentNullException)
                            {
                                //The end of the book.....
                                return;
                            }
                            catch (NullReferenceException)
                            {
                                //The end of the book.....
                                return;
                            }
                        }
                    }
                }
                catch (WebException ex)
                {
                    if (((HttpWebResponse)ex.Response).StatusCode == HttpStatusCode.NotFound)
                    {
                        return;
                    }
                }
            }while (!nextPageUrl.IsEmpty());
        }