Exemple #1
0
        public override void ScrapeUrl(ScrapingService scrapingService, string url, string userName, string categoryName, string galleryName)
        {
            var startIndex = url.IndexOf("profile/") + 8;
            var length     = url.IndexOf("/galleries") - startIndex;

            userName = url.Substring(startIndex, length);

            Console.WriteLine($"USER {userName}");

            HtmlDocument htmlDoc = OpenDocument(url);

            var galleries = (IEnumerable <HtmlNode>)htmlDoc.DocumentNode.SelectNodes("//a[@class='blk_galleries']");

            foreach (var node in galleries)
            {
                if (node.InnerHtml.Contains("<b>Overview</b>") ||
                    node.InnerHtml.Contains("[Show All]") ||
                    node.InnerHtml.Contains("[Hide All]"))
                {
                    continue;
                }

                var responseUrl = GetResponseUrl(node.Attributes["href"].Value);

                scrapingService.ScrapeUrl(responseUrl, userName, node.InnerText, null);
            }
        }
        private void ScrapOrganizerPage(ScrapingService scrapingService, string userName, string categoryName, HtmlDocument htmlDoc)
        {
            Console.WriteLine($"ORGANIZER {userName} / {categoryName}");

            var galleries     = (IEnumerable <HtmlNode>)htmlDoc.DocumentNode.SelectNodes("//a[@class='blk_galleries']");
            var processedUrls = new List <string>();

            foreach (var node in galleries)
            {
                if (node.InnerHtml.Contains("Overview") ||
                    node.InnerHtml.Contains("[Show All]") ||
                    node.InnerHtml.Contains("[Hide All]") ||
                    !node.Attributes["href"].Value.StartsWith("/gallery/") ||
                    processedUrls.Contains(node.Attributes["href"].Value))
                {
                    continue;
                }

                processedUrls.Add(node.Attributes["href"].Value);

                var responseUrl = node.Attributes["href"].Value.StartsWith(BASE_URL) ?
                                  GetResponseUrl(node.Attributes["href"].Value) :
                                  GetResponseUrl(BASE_URL + "/" + node.Attributes["href"].Value);

                scrapingService.ScrapeUrl(responseUrl + "?view=2", userName, categoryName, node.InnerText.Replace("\t", string.Empty).Replace("\n", string.Empty));
            }
        }