예제 #1
0
        public async Task RobotsTxtExample()
        {
            var     loader       = new SitemapLoader();
            Sitemap robotSitemap = await loader.LoadFromRobotsTxtAsync(new Uri("https://www.google.com"));

            Assert.Equal(SitemapType.RobotsTxt, robotSitemap.SitemapType);
            Assert.NotEmpty(robotSitemap.Sitemaps); //We expect at least some Sitemaps to be in list
            Assert.Empty(robotSitemap.Items);       //Robots.txt can only link to Sitemaps  (Not content items)

            Sitemap firstSitemap = robotSitemap.Sitemaps.First();

            Assert.False(firstSitemap.IsLoaded); //We only have sitemap location. Contents not yet loaded nor parsed

            var firstLoadedSitemap = await loader.LoadAsync(firstSitemap);

            Assert.True(firstLoadedSitemap.IsLoaded); //Now items are loaded!

            //We have to check type as we can either have links to other sitemaps (i.e. index sitemaps)
            //-or- links to actual sitemap items (i.e. links to content)
            switch (firstLoadedSitemap.SitemapType)
            {
            case SitemapType.Index: Assert.NotEmpty(firstLoadedSitemap.Sitemaps); break;

            case SitemapType.Items: Assert.NotEmpty(firstLoadedSitemap.Items); break;

            default: throw new NotSupportedException($"SitemapType {firstLoadedSitemap.SitemapType} not expected here");
            }
        }
예제 #2
0
        private void LoadRobots(string url)
        {
            var robotSitemap = _loader.LoadFromRobotsTxtAsync(new Uri(url)).Result;

            if (robotSitemap.SitemapType != SitemapType.RobotsTxt)
            {
                return;
            }
            // загрузка всех sitemaps из robots.txt
            foreach (var sitemap in robotSitemap.Sitemaps)
            {
                var uri = sitemap.SitemapLocation;
                if (_sitemapUris.Any(z => z == uri))
                {
                    continue;
                }
                _sitemapUris.Add(uri);
                var loadedSitemap = _loader.LoadAsync(uri).Result;
                _loadedSitemaps.Add(loadedSitemap);
            }
            var cnt = _loadedSitemaps.Count;

            for (var i = 0; i < cnt; i++)
            {
                var loadedSitemap = _loadedSitemaps[i];

                if (loadedSitemap.SitemapType == SitemapType.Items)
                {
                    foreach (var item in loadedSitemap.Items)
                    {
                        var uri = item.Location;
                        //if (_locationList.Any(z => z == uri)) continue;
                        //_locationList.Add(uri);
                        _linkStorage.TryAdd(uri.AbsoluteUri);
                    }
                }

                if (loadedSitemap.SitemapType == SitemapType.Index)
                {
                    foreach (var sitemap1 in loadedSitemap.Sitemaps)
                    {
                        var uri = sitemap1.SitemapLocation;
                        if (_sitemapUris.Any(z => z == uri))
                        {
                            continue;
                        }
                        _sitemapUris.Add(uri);
                        var item = _loader.LoadAsync(uri).Result;
                        _loadedSitemaps.Add(item);
                    }
                }
                cnt = _loadedSitemaps.Count;
            }
        }