private void LoadRobots(string url) { var robotSitemap = _loader.LoadFromRobotsTxtAsync(new Uri(url)).Result; if (robotSitemap.SitemapType != SitemapType.RobotsTxt) { return; } // загрузка всех sitemaps из robots.txt foreach (var sitemap in robotSitemap.Sitemaps) { var uri = sitemap.SitemapLocation; if (_sitemapUris.Any(z => z == uri)) { continue; } _sitemapUris.Add(uri); var loadedSitemap = _loader.LoadAsync(uri).Result; _loadedSitemaps.Add(loadedSitemap); } var cnt = _loadedSitemaps.Count; for (var i = 0; i < cnt; i++) { var loadedSitemap = _loadedSitemaps[i]; if (loadedSitemap.SitemapType == SitemapType.Items) { foreach (var item in loadedSitemap.Items) { var uri = item.Location; //if (_locationList.Any(z => z == uri)) continue; //_locationList.Add(uri); _linkStorage.TryAdd(uri.AbsoluteUri); } } if (loadedSitemap.SitemapType == SitemapType.Index) { foreach (var sitemap1 in loadedSitemap.Sitemaps) { var uri = sitemap1.SitemapLocation; if (_sitemapUris.Any(z => z == uri)) { continue; } _sitemapUris.Add(uri); var item = _loader.LoadAsync(uri).Result; _loadedSitemaps.Add(item); } } cnt = _loadedSitemaps.Count; } }
public async Task RobotsTxtExample() { var loader = new SitemapLoader(); Sitemap robotSitemap = await loader.LoadFromRobotsTxtAsync(new Uri("https://www.google.com")); Assert.Equal(SitemapType.RobotsTxt, robotSitemap.SitemapType); Assert.NotEmpty(robotSitemap.Sitemaps); //We expect at least some Sitemaps to be in list Assert.Empty(robotSitemap.Items); //Robots.txt can only link to Sitemaps (Not content items) Sitemap firstSitemap = robotSitemap.Sitemaps.First(); Assert.False(firstSitemap.IsLoaded); //We only have sitemap location. Contents not yet loaded nor parsed var firstLoadedSitemap = await loader.LoadAsync(firstSitemap); Assert.True(firstLoadedSitemap.IsLoaded); //Now items are loaded! //We have to check type as we can either have links to other sitemaps (i.e. index sitemaps) //-or- links to actual sitemap items (i.e. links to content) switch (firstLoadedSitemap.SitemapType) { case SitemapType.Index: Assert.NotEmpty(firstLoadedSitemap.Sitemaps); break; case SitemapType.Items: Assert.NotEmpty(firstLoadedSitemap.Items); break; default: throw new NotSupportedException($"SitemapType {firstLoadedSitemap.SitemapType} not expected here"); } }
public IRobotsSitemap Load(Uri sitemapLocation) => new RobotsSitemap(_adapteeObject.LoadAsync(sitemapLocation).Result);