public async Task <CrawlerItemCollection> GetLevelLinks(CrawlerItemCollection items, string selector)
        {
            if (items == null)
            {
                return(null);
            }

            for (int i = 0; i < items.Count; i++)
            {
                if (items[i].Items == null)
                {
                    items[i].Items = new CrawlerItemCollection();
                }
                var config   = Configuration.Default.WithDefaultLoader();
                var document = await BrowsingContext.New(config).OpenAsync(items[i].Url);

                var hrefs = document.QuerySelectorAll(selector);

                foreach (var element in hrefs)
                {
                    items[i].Items.Add(new CrawlerItem(element.TextContent, element.GetAttribute("href")));
                }
            }
            return(items);
        }
        public Crawler(string startUrl, int deepLevel = 1)
        {
            if (string.IsNullOrEmpty(startUrl) || deepLevel < 1)
            {
                throw new ArgumentException("Некорректные параметры");
            }

            _startUrl  = startUrl;
            _deepLevwl = deepLevel;
            _items     = new CrawlerItemCollection();
        }
Exemple #3
0
        public void AddRange(CrawlerItemCollection items)
        {
            if (items == null)
            {
                throw new ArgumentException();
            }

            for (int i = 0; i < items.Count; i++)
            {
                if (!IsContains(items[i]))
                {
                    _list.Add(items[i]);
                }
            }
        }
        public async Task <CrawlerItemCollection> GetCrawlerResults(CrawlerItemCollection items, string link, int deepLevel)
        {
            items = new CrawlerItemCollection();

            items.Add(new CrawlerItem("Справочник ссузов", link));

            var scan1 = await GetLevelLinks(items, "h2.edn_articleTitle>a");

            var scan2 = await GetLevelLinks(items, "a.page");

            //var scan3 = await GetLevelLinks(items[0].Items, "h2.edn_articleTitle>a");
            //var scan4 = await GetLevelLinks(items[0].Items, "a.page");

            var scan5 = await GetLevelLinks(items[0].Items, "a");

            return(items);
        }