public PagalWorldPage(string url, string downLoadFolderName, HapScraper hapScraper, PagalWorldPage parentPage = null) { _hapScraper = hapScraper; this.Url = url; if (string.IsNullOrWhiteSpace(downLoadFolderName)) { throw new NotImplementedException("download folder name is neccessary!"); } if (parentPage != null) { ParentPage = parentPage; this.DownloadFolderPath = parentPage.DownloadFolderPath + "\\" + downLoadFolderName; } else { this.DownloadFolderPath = downLoadFolderName; } if (document == null) { document = new Hap.HtmlDocument(); } if (MidiUrls == null) { MidiUrls = new List <string>(); } if (ChildPages == null) { ChildPages = new List <PagalWorldPage>(); } }
private void extractAllPages() { List <Page> pageUrls = new List <Page>(); var divContainers = document.DocumentNode.Descendants("div").Where(div => div.Id == "genreMoreList").Take(1).ToList(); if (divContainers.Count > 0) { divContainers[0].Descendants("ul").Where(ul => ul.Attributes.Contains("class") && ul.Attributes["class"].Value.Contains("img-link")) .Take(1).ToList()[0].Descendants("li").ToList().ForEach(li => { li.Descendants("a").ToList().ForEach(a => { Page page = new Page(); if (a.Attributes.Contains("href")) { page.Url = a.Attributes["href"].Value; page.Title = a.InnerText; pageUrls.Add(page); } }); }); } ; if (pageUrls.Count > 0) { for (int i = 0; i < pageUrls.Count; i++) { PagalWorldPage page = new PagalWorldPage(pageUrls[i].Url, pageUrls[i].Title, _hapScraper, this); //extract contents of child pages page.ExtractContents(); this.ChildPages.Add(page); } } else { this.HasChildren = true; } }
private PagalWorldPage getNextChildPage() { PagalWorldPage childPage = (PagalWorldPage)this.ChildPages.Select(p => p.HasChildren != true).Take(1); return(childPage); }
public void AddChildPage(PagalWorldPage childPage) { this.ChildPages.Add(childPage); }
public void Run() { PagalWorldPage pagalWorldPage = new PagalWorldPage(MasterUrl, "ExtractedMidis", this); pagalWorldPage.ExtractContents(); }