Exemple #1
0
        private void Crawl(string url, int currentLevel, int maxLevel)
        {
            if (currentLevel >= maxLevel)
            {
                return;
            }

            this.visitedUrls.Add(url);

            string html = string.Empty;

            try
            {
                html = this.htmlFetcher.Fetch(url);
            }
            catch (WebException ex)
            {
                return;
            }

            var imageUrls = this.htmlParser.ParseImages(html)
                            .Where(u => ResourceUtility.IsValidImage(u))
                            .Select(u => ResourceUtility.GetValidResourceString(url, u))
                            .Where(u => !this.downloadedImgs.Contains(u));

            this.urlRepository.AddRange(imageUrls);
            foreach (var imageUrl in imageUrls)
            {
                this.downloadedImgs.Add(imageUrl);
            }

            var anchorUrls = this.htmlParser.ParseAnchors(html)
                             .Select(u => ResourceUtility.GetValidResourceString(url, u))
                             .Where(u => !this.visitedUrls.Contains(u));

            foreach (var anchorUrl in anchorUrls)
            {
                this.Crawl(anchorUrl, currentLevel + 1, maxLevel);
            }
        }