Example #1
0
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link,
                                                                     ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token         = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            var paged = GetPagedChapterUrls(doc.DocumentElement);

            WebNovelChapter chapter = ParseChapter(doc.DocumentElement, token);

            if (chapter == null)
            {
                return(null);
            }

            chapter.Url            = link.Url;
            chapter.NextChapterUrl = UrlHelper.ToAbsoluteUrl(link.Url, chapter.NextChapterUrl);

            foreach (var page in paged)
            {
                string pageContent = await GetWebPageAsync(page, token);

                IHtmlDocument pageDoc = await Parser.ParseAsync(pageContent, token);

                chapter.Content += ParseChapter(pageDoc.DocumentElement, token).Content;
            }

            return(chapter);
        }
Example #2
0
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link,
                                                                     ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token         = default(CancellationToken))
        {
            string pageContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(pageContent, token);

            IElement postBodyEl = (from e in doc.All
                                   where e.LocalName == "div"
                                   where e.HasAttribute("class")
                                   let classAttribute = e.GetAttribute("class")
                                                        where classAttribute.Contains("post_body")
                                                        select e).FirstOrDefault();

            if (postBodyEl == null)
            {
                return(null);
            }

            RemoveNavigation(postBodyEl);
            RemoveDonation(postBodyEl);
            ExpandSpoilers(postBodyEl);
            RemoveEmptyTags(postBodyEl);

            var content = CleanupHTML(postBodyEl.InnerHtml);

            return(new WebNovelChapter
            {
                Url = link.Url,
                Content = content
            });
        }
Example #3
0
        public override async Task<ChapterLink[]> GetLinks(string baseUrl)
        {
            string baseContent = await GetWebPage(baseUrl);

            HtmlDocument baseDoc = new HtmlDocument();
            baseDoc.LoadHtml(baseContent);

            var chapterNodes = baseDoc.DocumentNode.SelectNodes("//li[@class='chapter']");

            var links = new List<ChapterLink>();
            foreach (HtmlNode chapterNode in chapterNodes)
            {
                HtmlNode linkNode = chapterNode.SelectSingleNode(".//a");

                string title = linkNode.Attributes["title"].Value;

                ChapterLink link = new ChapterLink
                {
                    Name = title,
                    Url = linkNode.Attributes["href"].Value,
                    Unknown = false
                };

                links.Add(link);
            }

            return links.ToArray();
        }
Example #4
0
        protected override IEnumerable <ChapterLink> CollectChapterLinks(string baseUrl, IEnumerable <IElement> linkElements, Func <IElement, bool> linkFilter = null)
        {
            foreach (IElement possibleChapter in linkElements)
            {
                if (!possibleChapter.HasAttribute("href"))
                {
                    continue;
                }

                string chTitle = WebUtility.HtmlDecode(possibleChapter.TextContent);
                string chLink  = possibleChapter.GetAttribute("href");
                chLink = UrlHelper.ToAbsoluteUrl(BaseUrl, chLink);

                ChapterLink link = new ChapterLink
                {
                    Name    = chTitle,
                    Url     = chLink,
                    Unknown = true
                };

                if (PossibleChapterNameParts.Any(p => chTitle.IndexOf(p, StringComparison.CurrentCultureIgnoreCase) >= 0))
                {
                    link.Unknown = false;
                }

                yield return(link);
            }
        }
Example #5
0
        protected virtual IEnumerable <ChapterLink> CollectChapterLinks(string baseUrl, IEnumerable <IElement> linkElements,
                                                                        Func <IElement, bool> linkFilter = null)
        {
            if (linkFilter != null)
            {
                linkElements = linkElements.Where(linkFilter);
            }

            linkElements = linkElements.Where(p => p.LocalName == "a");

            foreach (IElement e in linkElements)
            {
                if (string.IsNullOrWhiteSpace(e.TextContent) || !e.HasAttribute("href"))
                {
                    continue;
                }

                string url = UrlHelper.ToAbsoluteUrl(baseUrl, e.GetAttribute("href"));

                if (string.IsNullOrEmpty(url))
                {
                    continue;
                }

                ChapterLink link = new ChapterLink
                {
                    Name = WebUtility.HtmlDecode(e.TextContent),
                    Url  = url
                };

                yield return(link);
            }
        }
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            IElement titleElement   = doc.DocumentElement.QuerySelector(".chapter-title");
            IElement chapterElement = doc.DocumentElement.QuerySelector(".chapter-body");

            // Append paragraphs after each "sentence.translated" element.
            chapterElement
            .QuerySelectorAll("sentence.translated")
            .ToList()
            .ForEach((obj) => obj.AppendChild(doc.CreateElement("P")));
            var contentEl = doc.CreateElement("P");

            contentEl.InnerHtml = string.Join("", chapterElement
                                              .QuerySelectorAll("sentence.translated")
                                              .Select(x => x.InnerHtml));
            RemoveSpecialTags(doc, contentEl);

            string nextChapter = doc.QuerySelector("ul.pager > li.next > a")?.GetAttribute("href");

            return(new WebNovelChapter
            {
                ChapterName = titleElement?.GetInnerText(),
                Content = new ContentCleanup(BaseUrl).Execute(doc, contentEl),
                NextChapterUrl = nextChapter
            });
        }
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link,
                                                                     ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token         = default(CancellationToken))
        {
            string pageContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(pageContent, token);

            IElement postBodyEl = doc.QuerySelector(".chapter-content");

            if (postBodyEl == null)
            {
                return(null);
            }

            RemoveNavigation(postBodyEl);
            RemoveAdvertisements(postBodyEl);
            ExpandSpoilers(postBodyEl);
            RemoveFontStyle(postBodyEl);

            var content = new ContentCleanup(BaseUrl).Execute(doc, postBodyEl);

            return(new WebNovelChapter
            {
                Url = link.Url,
                Content = content
            });
        }
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link,
                                                                     ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token         = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            return(new WebNovelChapter()
            {
                Content = new ContentCleanup(BaseUrl).Execute(doc, doc.QuerySelector("#storytext"))
            });
        }
Example #9
0
        public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link)
        {
            string baseContent = await GetWebPage(link.Url);

            HtmlDocument baseDoc = new HtmlDocument();
            baseDoc.LoadHtml(baseContent);

            HtmlNode contentNode = baseDoc.GetElementbyId("mw-content-text");

            if (contentNode == null)
                return null;

            baseDoc.GetElementbyId("toc")?.Remove();

            foreach (HtmlNode linkNode in contentNode.SelectNodes(".//a"))
            {
                linkNode.SetAttributeValue("href", GetAbsoluteUrl(BaseUrl, WebUtility.HtmlDecode(linkNode.Attributes["href"].Value)));

                HtmlNode imgNode = linkNode.SelectSingleNode("img");

                if (imgNode != null)
                {
                    foreach (HtmlAttribute attrib in imgNode.Attributes.Where(p => p.Name != "width" && p.Name != "height").ToList())
                        attrib.Remove();

                    string linkImgUrl = linkNode.Attributes["href"].Value;
                    string imgPageContent = await GetWebPage(linkImgUrl);

                    HtmlDocument imageDoc = new HtmlDocument();
                    imageDoc.LoadHtml(imgPageContent);

                    HtmlNode fullImageNode = imageDoc.DocumentNode.SelectSingleNode("//div[@class='fullMedia']/a");

                    if (fullImageNode == null)
                        continue;

                    string imageLink = fullImageNode.Attributes["href"].Value;

                    imgNode.SetAttributeValue("src", GetAbsoluteUrl(BaseUrl, imageLink));
                }
            }

            return new WebNovelChapter
            {
                Url = link.Url,
                Content = contentNode.InnerHtml
            };
        }
Example #10
0
        public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link)
        {
            string pageContent = await GetWebPage(link.Url);

            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(pageContent);

            HtmlNode firstPostNode = doc.DocumentNode.SelectSingleNode("//div[contains(@class, 'post_body')]");

            RemoveNonTables(firstPostNode);
            
            return new WebNovelChapter
            {
                Url = link.Url,
                Content = firstPostNode.InnerHtml
            };
        }
Example #11
0
        public override async Task<ChapterLink[]> GetLinks(string baseUrl)
        {
            string baseContent = await GetWebPage(baseUrl);

            HtmlDocument baseDoc = new HtmlDocument();
            baseDoc.LoadHtml(baseContent);

            HtmlNode entryNode = baseDoc.DocumentNode.SelectSingleNode("//div[contains(@class, 'entry-content')]");

            if (entryNode == null)
                entryNode = baseDoc.DocumentNode.SelectSingleNode("//div[contains(@class, 'entry')]");

            if (entryNode == null)
                entryNode = baseDoc.DocumentNode.SelectSingleNode("//div[contains(@class, 'post-content')]");

            if (entryNode == null)
                entryNode = baseDoc.DocumentNode.SelectSingleNode("//div[contains(@class, 'the-content')]");

            if (entryNode == null)
                return null;

            var linkNodes = entryNode.SelectNodes(".//a");

            var links = new List<ChapterLink>();
            foreach (HtmlNode linkNode in linkNodes)
            {
                if (string.IsNullOrWhiteSpace(linkNode.InnerText))
                    continue;

                ChapterLink link = new ChapterLink
                {
                    Name = WebUtility.HtmlDecode(linkNode.InnerText),
                    Url = linkNode.Attributes["href"].Value,
                    Unknown = true
                };

                if (link.Name.IndexOf("chap", StringComparison.CurrentCultureIgnoreCase) >= 0)
                    link.Unknown = false;

                links.Add(link);
            }

            return links.ToArray();
        }
        protected virtual IEnumerable <ChapterLink> CollectChapterLinks(string baseUrl, IEnumerable <IElement> linkElements,
                                                                        Func <IElement, bool> linkFilter = null)
        {
            if (linkFilter != null)
            {
                linkElements = linkElements.Where(linkFilter);
            }

            linkElements = linkElements.Where(p => p.LocalName == "a");

            foreach (IElement e in linkElements)
            {
                if (string.IsNullOrWhiteSpace(e.TextContent) || !e.HasAttribute("href"))
                {
                    continue;
                }

                // Skip social media share links
                if (e.HasAttribute("Class") && e.GetAttribute("Class").Contains("share-icon") && e.GetAttribute("rel") == "nofollow")
                {
                    continue;
                }

                var    hrefUri     = new Uri(e.GetAttribute("href"), UriKind.RelativeOrAbsolute);
                bool   otherDomain = hrefUri.IsAbsoluteUri && hrefUri.Host != new Uri(baseUrl, UriKind.Absolute).Host;
                string url         = UrlHelper.ToAbsoluteUrl(baseUrl, e.GetAttribute("href"));

                if (string.IsNullOrEmpty(url))
                {
                    continue;
                }

                ChapterLink link = new ChapterLink
                {
                    Name    = e.GetInnerText(),
                    Url     = url,
                    Unknown = otherDomain
                };

                yield return(link);
            }
        }
Example #13
0
        public override async Task<ChapterLink[]> GetLinks(string baseUrl)
        {
            string baseContent = await GetWebPage(baseUrl);

            HtmlDocument baseDoc = new HtmlDocument();
            baseDoc.LoadHtml(baseContent);

            HtmlNode contentNode = baseDoc.GetElementbyId("mw-content-text");

            if (contentNode == null)
                return null;

            var possibleChapters = contentNode.SelectNodes("//ul/li/a");

            if (possibleChapters == null)
                return new ChapterLink[0];

            var links = new List<ChapterLink>();

            foreach (HtmlNode possibleChapter in possibleChapters)
            {
                string chTitle = WebUtility.HtmlDecode(possibleChapter.InnerText);
                string chLink = possibleChapter.Attributes["href"].Value;
                chLink = new Uri(new Uri(BaseUrl), chLink).AbsoluteUri;

                ChapterLink link = new ChapterLink
                {
                    Name = chTitle,
                    Url = chLink,
                    Unknown = true
                };

                if (PossibleChapterNameParts.Any(p => chTitle.IndexOf(p, StringComparison.CurrentCultureIgnoreCase) >= 0))
                    link.Unknown = false;

                links.Add(link);
            }

            return links.ToArray();
        }
Example #14
0
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token = default(CancellationToken))
        {
            string content = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(content, token);

            IElement titleElement   = doc.DocumentElement.FirstWhereHasClass(ChapterTitleClasses);
            IElement chapterElement = doc.DocumentElement.FirstWhereHasClass(ChapterClasses);

            var chContentElements = chapterElement.WhereHasClass(ChapterContentClasses, element => element.LocalName == "sentence");

            string contents    = string.Join("<br/><br/>", chContentElements.Select(p => p.InnerHtml));
            string nextChapter = doc.QuerySelector("ul.pager > li.next > a")?.GetAttribute("href");

            return(new WebNovelChapter
            {
                ChapterName = titleElement?.TextContent,
                Content = contents,
                NextChapterUrl = nextChapter
            });
        }
        public override async Task <WebNovelChapter> GetChapterAsync(
            ChapterLink link,
            ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
            CancellationToken token         = default(CancellationToken))
        {
            string baseContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);

            IElement titleElement = doc.DocumentElement.FirstWhereHasClass(TitleClasses);

            WebNovelChapter chapter = ParseChapter(doc, link.Url, doc.DocumentElement, token);

            chapter.Url = link.Url;

            if (titleElement != null)
            {
                chapter.ChapterName = titleElement.Text().Trim();
            }

            return(chapter);
        }
Example #16
0
        protected override IEnumerable <ChapterLink> CollectChapterLinks(string baseUrl, IEnumerable <IElement> linkElements, Func <IElement, bool> linkFilter = null)
        {
            foreach (IElement chapterElement in linkElements)
            {
                IElement linkElement = chapterElement.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "a");

                if (linkElement == null || !linkElement.HasAttribute("title") || !linkElement.HasAttribute("href"))
                {
                    continue;
                }

                string title = linkElement.GetAttribute("title");

                ChapterLink link = new ChapterLink
                {
                    Name    = title,
                    Url     = linkElement.GetAttribute("href"),
                    Unknown = false
                };

                yield return(link);
            }
        }
Example #17
0
        public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link)
        {
            string title = string.Empty;
            string pageContent = await GetWebPage(link.Url);

            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(pageContent);

            HtmlNode articleNode = doc.DocumentNode.SelectSingleNode("//article");

            string content = string.Empty;
            if (articleNode != null)
            {
                HtmlNode ifNode = articleNode.SelectSingleNode(".//iframe");

                if (ifNode != null)
                {
                    string ifUrl = ifNode.Attributes["src"].Value;

                    content = await GetWebPage(ifUrl);
                }
                else
                {
                    RemoveBloat(articleNode);
                    content = articleNode.InnerHtml;
                }
            }

            HtmlNode entryNode = doc.DocumentNode.SelectSingleNode("//div[contains(@class, 'post-entry')]");

            if (entryNode == null)
                entryNode = doc.DocumentNode.SelectSingleNode("//div[contains(@class, 'entry-content')]");

            if (entryNode == null)
                entryNode = doc.DocumentNode.SelectSingleNode("//div[contains(@class, 'post-content')]");

            if (entryNode == null)
                entryNode = doc.DocumentNode.SelectSingleNode("//div[contains(@class, 'postbody')]");

            if (entryNode != null)
            {
                HtmlNode ifNode = entryNode.SelectSingleNode(".//iframe");

                if (ifNode != null)
                {
                    content = ifNode.InnerHtml;
                }
                else
                {
                    RemoveBloat(entryNode);
                    title = retrieveTitle(entryNode);
                    content = entryNode.OuterHtml;
                }
            }
            return new WebNovelChapter
            {
                Url = link.Url,
                Content = content,
                ChapterName = (title.Length > 0) ? title : ""
            };
        }
Example #18
0
        private async void convertBackgroundWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            var    items = new List <object>();
            string type  = string.Empty;
            string mode  = string.Empty;

            Invoke((MethodInvoker) delegate
            {
                type = ((string)websiteTypeComboBox.SelectedItem).ToLower();
                mode = ((string)modeComboBox.SelectedItem).ToLower();
                items.AddRange(chaptersListBox.Items.Cast <object>());
            });

            EBook book = new EBook
            {
                Title      = titleTextBox.Text,
                CoverImage = coverTextBox.Text
            };

            foreach (object obj in items)
            {
                if (obj is ChapterLink)
                {
                    ChapterLink link = (ChapterLink)obj;

                    try
                    {
                        WebNovelSource  source  = GetSource(link.Url, type);
                        WebNovelChapter chapter = await source.GetChapterAsync(link);

                        if (chapter == null)
                        {
                            WriteText($"Failed to process {link.Name}!", Color.Red);
                        }
                        else
                        {
                            book.Chapters.Add(new Chapter {
                                Name = link.Name, Content = chapter.Content
                            });

                            WriteText($"{link.Name} has been processed.", Color.Green);
                        }
                    }
                    catch (Exception ex)
                    {
                        WriteText($"Failed to process {link.Name}!", Color.Red);
                        WriteText($"ERROR: {ex}", Color.Red);
                    }

                    if (Settings.Default.DelayPerChapter > 0)
                    {
                        await Task.Delay(TimeSpan.FromSeconds(Settings.Default.DelayPerChapter));
                    }
                }
                else if (obj is WebNovelChapter)
                {
                    WebNovelChapter wn = (WebNovelChapter)obj;

                    book.Chapters.Add(new Chapter {
                        Name = wn.ChapterName, Content = wn.Content
                    });
                }
            }

            WriteText("Generating epub...");

            try
            {
                await book.GenerateEpubAsync(e.Argument.ToString());
            }
            catch (Exception ex)
            {
                WriteText("Error generating Epub", Color.Red);
                WriteText($"ERROR: {ex}", Color.Red);
            }

            WriteText("Done!", Color.Green);

            Invoke((MethodInvoker) delegate
            {
                progressBar.Visible   = false;
                convertButton.Enabled = true;
            });
        }
Example #19
0
        private async void retrieveBackgroundWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            string type = string.Empty;
            string mode = string.Empty;
            string modeSelectedText = string.Empty;
            int amount = 0;

            Invoke((MethodInvoker)delegate
            {
                type = ((string)websiteTypeComboBox.SelectedItem).ToLower();
                mode = ((string)modeComboBox.SelectedItem).ToLower();
                modeSelectedText = modeSelectedTextBox.Text;
                amount = (int)amountNumericUpDown.Value;
            });

            if (!(modeSelectedText.StartsWith("http://") || modeSelectedText.StartsWith("https://")))
                modeSelectedText = "http://" + modeSelectedText;

            WebNovelSource source = GetSource(modeSelectedText, type);

            WebNovelInfo novelInfo = await source.GetNovelInfoAsync(modeSelectedText);

            if (mode == "table of contents")
            {
                ChapterLink[] links = (await source.GetChapterLinksAsync(modeSelectedText)).ToArray();

                Invoke((MethodInvoker)delegate
                {
                    foreach (ChapterLink link in links)
                    {
                        if (link.Unknown)
                        {
                            unknownListBox.Items.Add(link);
                        }
                        else
                        {
                            chaptersListBox.Items.Add(link);
                        }
                    }

                    if (novelInfo != null)
                    {
                        if (!string.IsNullOrEmpty(novelInfo.CoverUrl))
                        {
                            try
                            {
                                string coverUrl = novelInfo.CoverUrl;
                                coverUrl = coverUrl.StartsWith("//") ? coverUrl.Substring(2) : coverUrl;
                                coverTextBox.Text = new UriBuilder(coverUrl).Uri.AbsoluteUri;
                            }
                            catch (UriFormatException) { }
                        }

                        if (!string.IsNullOrEmpty(novelInfo.Title))
                            titleTextBox.Text = novelInfo.Title;
                    }

                    progressBar.Visible = false;
                    retrieveButton.Enabled = true;
                });
            }
            else if (mode == "next chapter link")
            {
                ChapterLink firstChapter = new ChapterLink { Url = modeSelectedText };
                ChapterLink current = firstChapter;

                int ctr = 1;
                var chapters = new List<WebNovelChapter>();
                while (true)
                {
                    WebNovelChapter chapter;
                    try
                    {
                        chapter = await source.GetChapterAsync(current);
                    }
                    catch (HttpRequestException)
                    {
                        break;
                    }

                    if (chapter == null)
                        break;

                    if (string.IsNullOrEmpty(chapter.ChapterName))
                        chapter.ChapterName = current.Url;

                    chapters.Add(chapter);

                    WriteText($"Found Chapter {chapter.ChapterName}", Color.Green);

                    if (string.IsNullOrEmpty(chapter.NextChapterUrl) || chapter.Url == chapter.NextChapterUrl)
                        break;

                    current = new ChapterLink { Url = chapter.NextChapterUrl };

                    if (ctr == amount)
                        break;

                    ctr++;
                }

                Invoke((MethodInvoker)delegate
                {
                    chaptersListBox.Items.AddRange(chapters.Cast<object>().ToArray());

                    progressBar.Visible = false;
                    retrieveButton.Enabled = true;
                });
            }
        }
 public virtual Task <WebNovelChapter> GetChapterAsync(ChapterLink link, ChapterRetrievalOptions options = default(ChapterRetrievalOptions), CancellationToken token = default(CancellationToken))
 {
     throw new NotImplementedException();
 }
Example #21
0
        public override async Task <WebNovelChapter> GetChapterAsync(ChapterLink link,
                                                                     ChapterRetrievalOptions options = default(ChapterRetrievalOptions),
                                                                     CancellationToken token         = default(CancellationToken))
        {
            string baseContent = await GetWebPageAsync(link.Url, token);

            IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);

            IElement contentElement = doc.GetElementById("mw-content-text");

            if (contentElement == null)
            {
                return(null);
            }

            doc.GetElementById("toc")?.Remove();

            RemoveTables(contentElement);

            foreach (IElement linkElement in contentElement.Descendents <IElement>().Where(p => p.LocalName == "a"))
            {
                if (!linkElement.HasAttribute("href"))
                {
                    continue;
                }

                string rel = WebUtility.HtmlDecode(linkElement.GetAttribute("href"));

                linkElement.SetAttribute("href", UrlHelper.ToAbsoluteUrl(BaseUrl, rel));

                IElement imgElement = linkElement.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "img");

                if (imgElement != null)
                {
                    foreach (var attrib in imgElement.Attributes.Where(p => p.LocalName != "width" && p.LocalName != "height").ToList())
                    {
                        imgElement.RemoveAttribute(attrib.Name);
                    }

                    string linkImgUrl     = linkElement.GetAttribute("href");
                    string imgPageContent = await GetWebPageAsync(linkImgUrl, token);

                    IHtmlDocument imgDoc = await Parser.ParseAsync(imgPageContent, token);

                    IElement fullImageElement = (from e in imgDoc.Descendents <IElement>()
                                                 where e.LocalName == "div"
                                                 where e.HasAttribute("class")
                                                 let classAttribute = e.GetAttribute("class")
                                                                      where classAttribute == "fullMedia"
                                                                      let imgLink = e.Descendents <IElement>().FirstOrDefault(p => p.LocalName == "a")
                                                                                    select imgLink).FirstOrDefault();

                    if (fullImageElement == null || !fullImageElement.HasAttribute("href"))
                    {
                        continue;
                    }

                    string imageLink = fullImageElement.GetAttribute("href");

                    imgElement.SetAttribute("src", UrlHelper.ToAbsoluteUrl(BaseUrl, imageLink));
                }
            }

            return(new WebNovelChapter
            {
                Url = link.Url,
                Content = contentElement.InnerHtml
            });
        }
 public abstract Task<WebNovelChapter> GetChapterAsync(ChapterLink link);
Example #23
0
        private async void retrieveBackgroundWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            string type             = string.Empty;
            string mode             = string.Empty;
            string modeSelectedText = string.Empty;
            int    amount           = 0;

            Invoke((MethodInvoker) delegate
            {
                type             = ((string)websiteTypeComboBox.SelectedItem).ToLower();
                mode             = ((string)modeComboBox.SelectedItem).ToLower();
                modeSelectedText = modeSelectedTextBox.Text;
                amount           = (int)amountNumericUpDown.Value;
            });

            if (!(modeSelectedText.StartsWith("http://") || modeSelectedText.StartsWith("https://")))
            {
                modeSelectedText = "http://" + modeSelectedText;
            }

            WebNovelSource source = GetSource(modeSelectedText, type);

            WebNovelInfo novelInfo = await source.GetNovelInfoAsync(modeSelectedText);

            if (mode == "table of contents")
            {
                ChapterLink[] links = (await source.GetChapterLinksAsync(modeSelectedText)).ToArray();

                Invoke((MethodInvoker) delegate
                {
                    foreach (ChapterLink link in links)
                    {
                        if (link.Unknown)
                        {
                            unknownListBox.Items.Add(link);
                        }
                        else
                        {
                            chaptersListBox.Items.Add(link);
                        }
                    }

                    if (novelInfo != null)
                    {
                        if (!string.IsNullOrEmpty(novelInfo.CoverUrl))
                        {
                            try
                            {
                                string coverUrl   = novelInfo.CoverUrl;
                                coverUrl          = coverUrl.StartsWith("//") ? coverUrl.Substring(2) : coverUrl;
                                coverTextBox.Text = new UriBuilder(coverUrl).Uri.AbsoluteUri;
                            }
                            catch (UriFormatException) { }
                        }

                        if (!string.IsNullOrEmpty(novelInfo.Title))
                        {
                            titleTextBox.Text = novelInfo.Title;
                        }
                    }

                    progressBar.Visible    = false;
                    retrieveButton.Enabled = true;
                });
            }
            else if (mode == "next chapter link")
            {
                ChapterLink firstChapter = new ChapterLink {
                    Url = modeSelectedText
                };
                ChapterLink current = firstChapter;

                int ctr      = 1;
                var chapters = new List <WebNovelChapter>();
                while (true)
                {
                    WebNovelChapter chapter;
                    try
                    {
                        chapter = await source.GetChapterAsync(current);
                    }
                    catch (HttpRequestException)
                    {
                        break;
                    }

                    if (chapter == null)
                    {
                        break;
                    }

                    if (string.IsNullOrEmpty(chapter.ChapterName))
                    {
                        chapter.ChapterName = current.Url;
                    }

                    chapters.Add(chapter);

                    WriteText($"Found Chapter {chapter.ChapterName}", Color.Green);

                    if (string.IsNullOrEmpty(chapter.NextChapterUrl) || chapter.Url == chapter.NextChapterUrl)
                    {
                        break;
                    }

                    current = new ChapterLink {
                        Url = chapter.NextChapterUrl
                    };

                    if (ctr == amount)
                    {
                        break;
                    }

                    ctr++;
                }

                Invoke((MethodInvoker) delegate
                {
                    chaptersListBox.Items.AddRange(chapters.Cast <object>().ToArray());

                    progressBar.Visible    = false;
                    retrieveButton.Enabled = true;
                });
            }
        }