//Function responsible of clearing database of queries before the crawler can start. public void emptyDatabase() { dbConn dbConnection = new dbConn(); dbConnection.dbTruncate(); dbConnection.dbClose(); }
//Get keywords from content static public void regexKeywords(string response) { //Fetch keywords from body string keywords = ""; string pattern = "(<meta itemprop=\"|<link itemprop=\"thumbnailUrl\")(.*?)\">"; string videoId = "(<meta itemprop=\"videoId\" content=\")(.*?)\">"; string title = "(<meta itemprop=\"name\" content=\")(.*?)\">"; string description = "(<meta itemprop=\"description\" content=\")(.*?)\">"; string genre = "(<meta itemprop=\"genre\" content=\")(.*?)\">"; string thumbnail = "(<link itemprop=\"thumbnailUrl\" href=\")(.*?)\">"; MatchCollection collection; MatchCollection collectionUrl; MatchCollection collectionTitle; MatchCollection collectionDescription; MatchCollection collectionGenre; MatchCollection collectionThumbnail; dbConn dbConnection = new dbConn(); try { collection = Regex.Matches(response, pattern); string foundUrl = "https://www.youtube.com/watch?v="; string foundTitle = ""; string foundDescription = ""; string foundGenre = ""; string foundThumbnail = ""; foreach (Match m in collection) { //Check matched resulsts keywords = m.Value; collectionUrl = Regex.Matches(m.Value, videoId); foreach (Match m2 in collectionUrl) { foundUrl += m2.Groups[2].Value; } collectionTitle = Regex.Matches(m.Value, title); foreach (Match m2 in collectionTitle) { foundTitle = m2.Groups[2].Value; } collectionDescription = Regex.Matches(m.Value, description); foreach (Match m2 in collectionDescription) { foundDescription = m2.Groups[2].Value; } collectionGenre = Regex.Matches(m.Value, genre); foreach (Match m2 in collectionGenre) { foundGenre = m2.Groups[2].Value; } collectionThumbnail = Regex.Matches(m.Value, thumbnail); foreach (Match m2 in collectionThumbnail) { foundThumbnail = m2.Groups[2].Value; } } //keywords in database dbConnection.insertQuery("INSERT INTO video(Url, Title, Description, Genre, Thumbnail) VALUES('" + foundUrl + "', '" + foundTitle + "', '" + foundDescription + "', '" + foundGenre + "', '" + foundThumbnail + "')"); } catch (NullReferenceException e) { Debug.WriteLine("getKeywords() geeft NullReferenceException: " + e.Message); } dbConnection.dbClose(); }