Ejemplo n.º 1
0
        public static HashSet <string> ReadFromLoc(string sitemapUrl)
        {
            var siteMapQuery = new SitemapQuery();
            var sitemap      = siteMapQuery.RetrieveSitemap(sitemapUrl);

            // Read the sitemap and store unique urls in the HashSet
            HashSet <string> urls = new HashSet <string>();

            foreach (var url in sitemap.Urls)
            {
                urls.Add(url.Location.AbsoluteUri);
            }

            return(urls);
        }
Ejemplo n.º 2
0
        static void Main(string[] args)
        {
            var siteMapQuery = new SitemapQuery();
            var wholeSitemap = siteMapQuery.RetrieveSitemap("https://www.taniaksiazka.pl/images/Sitemap/Wszystkieprodukty.xml.gz");

            List <string> urls = new List <string>();

            /*foreach(var sitemap in wholeSitemap.Sitemaps)
             *  foreach (var url in sitemap.Urls)
             *      urls.Add(url.Location.AbsolutePath);*/
            HashSet <string> uniqueUrl = new HashSet <string>(urls);

            /*Console.WriteLine("Tania ksiazka: " + urls.Count);
             *
             * wholeSitemap = siteMapQuery.RetrieveSitemap("https://www.nieprzeczytane.pl/sitemaps.xml");
             * urls.Clear();
             * foreach (var sitemap in wholeSitemap.Sitemaps)
             *  foreach (var url in sitemap.Urls)
             *      urls.Add(url.Location.AbsolutePath);
             * uniqueUrl = new HashSet<string>(urls);
             * Console.WriteLine("Nieprzeczytane: " + urls.Count);*/

            /*urls.Clear();
             * for (int i = 1; i <= 15; i++)
             * {
             *  wholeSitemap = siteMapQuery.RetrieveSitemap(String.Format("https://czytam.pl/sitemap/mapa{0}.xml", i));
             *  foreach (var url in wholeSitemap.Urls)
             *      urls.Add(url.Location.AbsolutePath);
             * }
             * uniqueUrl = new HashSet<string>(urls);
             * Console.WriteLine("Czytam: " + urls.Count);
             */
            uniqueUrl.Add("a");
            uniqueUrl.Add("a");

            Console.WriteLine(uniqueUrl.Count);
            //System.Net.WebClient webClient = new System.Net.WebClient();
            //webClient.Headers.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)");
            //webClient.DownloadFile("http://static.prsa.pl/a3e26a9b-265d-444e-b1ff-ba0b4c8f2a71.mp3", "test.mp3");
            //webClient.DownloadFileAsync(new Uri("https://static.prsa.pl/a3e26a9b-265d-444e-b1ff-ba0b4c8f2a71.mp3"), "test.mp3");
            //var data = webClient.DownloadData("https://static.prsa.pl/a3e26a9b-265d-444e-b1ff-ba0b4c8f2a71.mp3");
            Console.ReadKey();
        }
Ejemplo n.º 3
0
        public async Task <List <Uri> > GetUrlsAsync(string domain)
        {
            var urls         = new List <Uri>();
            var sitemapQuery = new SitemapQuery();

            IEnumerable <SitemapFile> sitemaps = null;

            try
            {
                sitemaps = await sitemapQuery
                           .GetAllSitemapsForDomainAsync(domain);
            }
            catch (NullReferenceException ex)
            {
                _logger.LogError(ex.Message);
            }
            catch (InvalidOperationException)
            {
                _logger.LogInformation($"Invalid sitemap");
            }
            catch (HttpRequestException)
            {
                _logger.LogInformation($"Invalid host");
            }
            finally
            {
                if (sitemaps == null)
                {
                    sitemaps = new List <SitemapFile>();
                }
            }

            urls.AddRange(
                sitemaps.SelectMany(
                    s => s.Urls.Select(u => u.Location))
                .Distinct());

            return(urls);
        }