public void FindIn(string rightPart) { try { WebClient client = new WebClient(); Stream stream = client.OpenRead(this.wa.Url + rightPart); StreamReader reader = new StreamReader(stream); if (reader.EndOfStream) { return; } string line; this.wa.Sitemaps = new List <Sitemap>(); while (!reader.EndOfStream) { line = reader.ReadLine(); if (line.Contains("Sitemap")) { string link = line.Split(":", 2)[1].Trim(); this.wa.Sitemaps.Add(new Sitemap { SitemapLink = UrlFormater.GetRightPart(link) }); } } if (this.wa.Sitemaps.Count > 0) { wa.isSuccess = true; } } catch {} }
public async Task <WebAddress> GetLinksAsync(WebAddress wa) { var config = Configuration.Default.WithDefaultLoader(); var context = BrowsingContext.New(config); for (int i = 0; i < wa.Sitemaps.Count; i++) { var sitemap = wa.Sitemaps.ElementAt(i); var document = await context.OpenAsync(wa.Url + sitemap.SitemapLink); var links = document.QuerySelectorAll(" * :not(:has(*))") .Where(v => v.TextContent.Contains(UrlFormater.GetLeftPartWOHttp(wa.Url))) .Select(v => v.TextContent).ToList(); if (!links.Any()) { continue; } if (links.First().EndsWith(".xml")) { foreach (var item in links) { wa.Sitemaps.Add(new Sitemap() { SitemapLink = UrlFormater.GetRightPart(item) }); } wa.Sitemaps.Remove(sitemap); } else { sitemap.Pages = Page.toPage(links); } } return(wa); }