static void Main(string[] args) { List <string> urlList = new List <string> { "https://www.youtube.com/watch?v=e8CLsYzE5wk" }; CrawlStatusManager.Init(); CrawlStatusManager.AddPendingWebsites(urlList); var crawledWebsites = new ESWriteWebsitesManager(); var pendingWebsites = new ESWritePendingWebsitesManager(); var suggestions = new ESWriteSuggestionsManager(); CrawlManager crawlManager = new CrawlManager(crawledWebsites, pendingWebsites, suggestions); crawlManager.StartCrawlingAsync(); Console.ReadLine(); }
public void ParseQueue(List <string> urlList, ESWriteWebsitesManager outputManager) { int i = 0; while (i < urlList.Count) { string url = urlList[i++]; try { if (!CrawlStatusManager.IsWebsiteRecentlyIndexed(url)) { CrawlStatusManager.MarkAsVisited(url); var htmlDoc = Utils.LoadWebsite(url); var retrievedInfo = Utils.RetrieveWebsiteInfo(url, htmlDoc); outputManager.OutputEntry(retrievedInfo); var relatedWebsiteUrls = Utils.RetrieveRelatedWebsitesUrls(url, htmlDoc); Console.WriteLine(i); if (relatedWebsiteUrls != null && relatedWebsiteUrls.Count() > 0) { urlList.AddRange(relatedWebsiteUrls); } } else { Console.WriteLine("Website --> ALREADY VISITED -->" + url + i); } } catch (Exception ex) { Console.WriteLine("Untreated error appeared. Skipping ---> " + url); } } }
public CrawlManager(ESWriteWebsitesManager crawledWebsites, ESWritePendingWebsitesManager pendingWebsites, ESWriteSuggestionsManager suggestions) { this.crawledWebsites = crawledWebsites; this.pendingWebsites = pendingWebsites; this.suggestions = suggestions; }