public override void ScrapeUrl(ScrapingService scrapingService, string url, string userName, string categoryName, string galleryName) { var startIndex = url.IndexOf("profile/") + 8; var length = url.IndexOf("/galleries") - startIndex; userName = url.Substring(startIndex, length); Console.WriteLine($"USER {userName}"); HtmlDocument htmlDoc = OpenDocument(url); var galleries = (IEnumerable <HtmlNode>)htmlDoc.DocumentNode.SelectNodes("//a[@class='blk_galleries']"); foreach (var node in galleries) { if (node.InnerHtml.Contains("<b>Overview</b>") || node.InnerHtml.Contains("[Show All]") || node.InnerHtml.Contains("[Hide All]")) { continue; } var responseUrl = GetResponseUrl(node.Attributes["href"].Value); scrapingService.ScrapeUrl(responseUrl, userName, node.InnerText, null); } }
private void ScrapOrganizerPage(ScrapingService scrapingService, string userName, string categoryName, HtmlDocument htmlDoc) { Console.WriteLine($"ORGANIZER {userName} / {categoryName}"); var galleries = (IEnumerable <HtmlNode>)htmlDoc.DocumentNode.SelectNodes("//a[@class='blk_galleries']"); var processedUrls = new List <string>(); foreach (var node in galleries) { if (node.InnerHtml.Contains("Overview") || node.InnerHtml.Contains("[Show All]") || node.InnerHtml.Contains("[Hide All]") || !node.Attributes["href"].Value.StartsWith("/gallery/") || processedUrls.Contains(node.Attributes["href"].Value)) { continue; } processedUrls.Add(node.Attributes["href"].Value); var responseUrl = node.Attributes["href"].Value.StartsWith(BASE_URL) ? GetResponseUrl(node.Attributes["href"].Value) : GetResponseUrl(BASE_URL + "/" + node.Attributes["href"].Value); scrapingService.ScrapeUrl(responseUrl + "?view=2", userName, categoryName, node.InnerText.Replace("\t", string.Empty).Replace("\n", string.Empty)); } }