Example #1
0
        public async Task GenerateSitemap()
        {
            List <string> new_urls = new List <string>();
            List <string> visited  = new List <string>();

            Document.Urls = visited;
            new_urls.Add(BaseUrl);
            do
            {
                List <string> hrefs = new List <string>();
                foreach (var url in new_urls)
                {
                    string text = await _loader.Get(url);

                    if (string.IsNullOrEmpty(text))
                    {
                        continue;
                    }
                    visited.Add(url);
                    Notify?.Invoke();
                    List <string> meta = Parser.GetAHrefs(text).Distinct().ToList();
                    Parser.Normalize(Domain, url, ref meta);
                    if (Exclude)
                    {
                        meta = meta.Select(u => u.Contains('?') ? u.Split('?')[0] : u).ToList();
                    }
                    hrefs.AddRange(meta);
                    hrefs = hrefs.Distinct().ToList();
                }
                new_urls = hrefs.Except(visited).ToList();
            }while (new_urls.Count != 0);
            Document.Save(SavePath);
        }
Example #2
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="path"></param>
        /// <returns></returns>
        public async Task GenerateSiteMap(String path)
        {
            List <string> newUrls = new List <string>();
            List <string> visited = new List <string>();

            _document.Urls = visited;
            newUrls.Add(BaseUrl);
            do
            {
                List <string> hrefs = new List <string>();
                foreach (var url in newUrls)
                {
                    try
                    {
                        string text = await _loader.Get(url);

                        if (string.IsNullOrEmpty(text))
                        {
                            continue;
                        }
                        visited.Add(url);
                        Notify?.Invoke();
                        List <string> meta = Parser.GetAHrefs(text).Distinct().ToList();
                        meta = Parser.Normalize(Domain, url, meta);
                        if (Exclude)
                        {
                            meta = meta.Select(u => u.Contains('?') ? u.Split('?')[0] : u).ToList();
                        }
                        hrefs.AddRange(meta);
                        hrefs = hrefs.Distinct().ToList();
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine($"[ cms][ warning]: resolve site map url failed! url={url} message={ex.Message}");
                    }
                }
                newUrls = hrefs.Except(visited).ToList();
            } while (newUrls.Count != 0);
            _document.Save(path);
        }