public async Task GenerateSitemap() { List <string> new_urls = new List <string>(); List <string> visited = new List <string>(); Document.Urls = visited; new_urls.Add(BaseUrl); do { List <string> hrefs = new List <string>(); foreach (var url in new_urls) { string text = await _loader.Get(url); if (string.IsNullOrEmpty(text)) { continue; } visited.Add(url); Notify?.Invoke(); List <string> meta = Parser.GetAHrefs(text).Distinct().ToList(); Parser.Normalize(Domain, url, ref meta); if (Exclude) { meta = meta.Select(u => u.Contains('?') ? u.Split('?')[0] : u).ToList(); } hrefs.AddRange(meta); hrefs = hrefs.Distinct().ToList(); } new_urls = hrefs.Except(visited).ToList(); }while (new_urls.Count != 0); Document.Save(SavePath); }
/// <summary> /// /// </summary> /// <param name="path"></param> /// <returns></returns> public async Task GenerateSiteMap(String path) { List <string> newUrls = new List <string>(); List <string> visited = new List <string>(); _document.Urls = visited; newUrls.Add(BaseUrl); do { List <string> hrefs = new List <string>(); foreach (var url in newUrls) { try { string text = await _loader.Get(url); if (string.IsNullOrEmpty(text)) { continue; } visited.Add(url); Notify?.Invoke(); List <string> meta = Parser.GetAHrefs(text).Distinct().ToList(); meta = Parser.Normalize(Domain, url, meta); if (Exclude) { meta = meta.Select(u => u.Contains('?') ? u.Split('?')[0] : u).ToList(); } hrefs.AddRange(meta); hrefs = hrefs.Distinct().ToList(); } catch (Exception ex) { Console.WriteLine($"[ cms][ warning]: resolve site map url failed! url={url} message={ex.Message}"); } } newUrls = hrefs.Except(visited).ToList(); } while (newUrls.Count != 0); _document.Save(path); }