//This gives the crawler a starting point without user input. public async Task <bool> crawlerStartingPoint() { bool completed = false; //Starting point try { //Crawler starting point Debug.WriteLine("crawlerStartingPoint gets results"); httpClientRequest = new HTTPFactory(); string httpResponseBody = ""; string url = "https://www.youtube.com/"; httpResponseBody = await httpClientRequest.YoutubeCrawlRequest(url); //Get the body from the HTTP response. httpResponseBody = CrawlerRegex.regexContent(httpResponseBody); lock (this.locker) { listResponses.Add(httpResponseBody); completed = true; } } catch (NullReferenceException e) { Debug.WriteLine("crawlerStartingPoint() geeft NullReferenceException: " + e.Message); } return(completed); }
//zoek zonder input van user beginpunt public async Task <bool> crawlBeginpunt() { bool gelukt = false; //beginpunt try { //crawl beginpunt Debug.WriteLine("crawlBeginpunt gets results"); httpClientRequest = new MaakHttpClientAan(); string httpResponseBody = ""; string url = "https://www.youtube.com/"; httpResponseBody = await httpClientRequest.doeHttpRequestYoutubeVoorScrawlerEnGeefResults(url); //haal de body uit de response httpResponseBody = CrawlerRegex.regexContent(httpResponseBody); lock (this.locker) { lijstResponses.Add(httpResponseBody); gelukt = true; } } catch (NullReferenceException e) { Debug.WriteLine("crawlBeginpunt() geeft NullReferenceException: " + e.Message); } return(gelukt); }
//zoek op userinput static public async Task <string> crawlZoekterm(string zoekterm) { MaakHttpClientAan httpClientRequest = new MaakHttpClientAan(); string httpResponseBody = await httpClientRequest.doeHttpRequestYoutubeMetZoektermEnGeefResults(zoekterm); //haal de results uit de response httpResponseBody = CrawlerRegex.regexResults(httpResponseBody); await Task.Factory.StartNew(async() => { //haal uit results urls List <string> urls = CrawlerRegex.regexUrls(httpResponseBody); //ga over de gevonden urls foreach (String url in urls) { //haal uit urls bodys string body = ""; string antwoord = ""; //getResponseBody url httpClientRequest = new MaakHttpClientAan(); await Task.Delay(1000); //welke url crawlen //Debug.WriteLine("url in getResponseBody() = " + url); antwoord = await httpClientRequest.doeHttpRequestYoutubeVoorScrawlerEnGeefResults(url); //haal content uit string body = CrawlerRegex.regexContent(antwoord); //haal keywords uit body CrawlerRegex.regexKeywords(body); } //gevonden urls gedaan }); //return string van httpResponseBody return(httpResponseBody); }
//Search with user input static public async Task <string> crawlerSearchterm(string zoekterm) { HTTPFactory httpClientRequest = new HTTPFactory(); string httpResponseBody = await httpClientRequest.YoutubeSearchResults(zoekterm); //Get results from response httpResponseBody = CrawlerRegex.regexResults(httpResponseBody); await Task.Factory.StartNew(async() => { //Extract urls from response List <string> urls = CrawlerRegex.regexUrls(httpResponseBody); //Iterate on the extracted results foreach (String url in urls) { //Extract body from URL string body = ""; string response = ""; //getResponseBody url httpClientRequest = new HTTPFactory(); await Task.Delay(1000); //Crawl on URL response = await httpClientRequest.YoutubeCrawlRequest(url); //Extract content from string body = CrawlerRegex.regexContent(response); //Get keywords from body CrawlerRegex.regexKeywords(body); } //End code with URLS found }); //return string from httpResponseBody return(httpResponseBody); }