Esempio n. 1
0
        static void Main(string[] args)
        {
            List <string> urlList = new List <string> {
                "https://www.youtube.com/watch?v=e8CLsYzE5wk"
            };

            CrawlStatusManager.Init();
            CrawlStatusManager.AddPendingWebsites(urlList);

            var crawledWebsites = new ESWriteWebsitesManager();
            var pendingWebsites = new ESWritePendingWebsitesManager();
            var suggestions     = new ESWriteSuggestionsManager();

            CrawlManager crawlManager = new CrawlManager(crawledWebsites, pendingWebsites, suggestions);

            crawlManager.StartCrawlingAsync();

            Console.ReadLine();
        }
Esempio n. 2
0
        public void ParseQueue(List <string> urlList, ESWriteWebsitesManager outputManager)
        {
            int i = 0;

            while (i < urlList.Count)
            {
                string url = urlList[i++];
                try
                {
                    if (!CrawlStatusManager.IsWebsiteRecentlyIndexed(url))
                    {
                        CrawlStatusManager.MarkAsVisited(url);

                        var htmlDoc = Utils.LoadWebsite(url);

                        var retrievedInfo = Utils.RetrieveWebsiteInfo(url, htmlDoc);

                        outputManager.OutputEntry(retrievedInfo);

                        var relatedWebsiteUrls = Utils.RetrieveRelatedWebsitesUrls(url, htmlDoc);

                        Console.WriteLine(i);

                        if (relatedWebsiteUrls != null && relatedWebsiteUrls.Count() > 0)
                        {
                            urlList.AddRange(relatedWebsiteUrls);
                        }
                    }
                    else
                    {
                        Console.WriteLine("Website --> ALREADY VISITED -->" + url + i);
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Untreated error appeared. Skipping ---> " + url);
                }
            }
        }
Esempio n. 3
0
 public CrawlManager(ESWriteWebsitesManager crawledWebsites, ESWritePendingWebsitesManager pendingWebsites, ESWriteSuggestionsManager suggestions)
 {
     this.crawledWebsites = crawledWebsites;
     this.pendingWebsites = pendingWebsites;
     this.suggestions     = suggestions;
 }