//This gives the crawler a starting point without user input. public async Task <bool> crawlerStartingPoint() { bool completed = false; //Starting point try { //Crawler starting point Debug.WriteLine("crawlerStartingPoint gets results"); httpClientRequest = new HTTPFactory(); string httpResponseBody = ""; string url = "https://www.youtube.com/"; httpResponseBody = await httpClientRequest.YoutubeCrawlRequest(url); //Get the body from the HTTP response. httpResponseBody = CrawlerRegex.regexContent(httpResponseBody); lock (this.locker) { listResponses.Add(httpResponseBody); completed = true; } } catch (NullReferenceException e) { Debug.WriteLine("crawlerStartingPoint() geeft NullReferenceException: " + e.Message); } return(completed); }
public async Task <string> getResponseBody(string url) { string replyBody = ""; //getResponseBody url httpClientRequest = new HTTPFactory(); //Which URL to fetch Debug.WriteLine("url in getResponseBody() = " + url); try { replyBody = await httpClientRequest.YoutubeCrawlRequest(url); //await = wacht totdat antwoord is } catch (Exception ex) { //Give exception if HTTPRequest has bugged. Debug.WriteLine("httpError: " + ex.StackTrace); } return(replyBody); }
//Search with user input static public async Task <string> crawlerSearchterm(string zoekterm) { HTTPFactory httpClientRequest = new HTTPFactory(); string httpResponseBody = await httpClientRequest.YoutubeSearchResults(zoekterm); //Get results from response httpResponseBody = CrawlerRegex.regexResults(httpResponseBody); await Task.Factory.StartNew(async() => { //Extract urls from response List <string> urls = CrawlerRegex.regexUrls(httpResponseBody); //Iterate on the extracted results foreach (String url in urls) { //Extract body from URL string body = ""; string response = ""; //getResponseBody url httpClientRequest = new HTTPFactory(); await Task.Delay(1000); //Crawl on URL response = await httpClientRequest.YoutubeCrawlRequest(url); //Extract content from string body = CrawlerRegex.regexContent(response); //Get keywords from body CrawlerRegex.regexKeywords(body); } //End code with URLS found }); //return string from httpResponseBody return(httpResponseBody); }