/// <summary> /// Method to add a review to a list within a ApplicationUser /// </summary> /// <param name="Id"></param> /// <param name="review"></param> /// <param name="sp"></param> public async void AddDriverReview(string Id, DriverReview review, IServiceProvider sp) { var _db = sp.GetService <ApplicationDbContext>(); var result = await Get(review.Message); //Adds sentiment info flattened data to a driver review review.SentimentEntities = new List <SentimentInfo>(); foreach (var r in result) { var sent = new SentimentInfo() { SentimentScore = r.sentiment.score, SentimentType = r.sentiment.type }; review.SentimentEntities.Add(sent); } review.TimeCreated = DateTime.Now; var user = _db.Users.Where(u => u.Id == Id).Include(u => u.Reviews).FirstOrDefault(); user.Reviews.Add(review); _db.SaveChanges(); }
/// <summary> /// Method to add a car review /// </summary> /// <param name="Id"></param> /// <param name="review"></param> /// <param name="sp"></param> public async void AddCarReview(int Id, CarReview review, IServiceProvider sp) { var _db = sp.GetService <ApplicationDbContext>(); var result = await Get(review.Message); //Section to add sentiment entity/score to a car review review.SentimentEntities = new List <SentimentInfo>(); foreach (var r in result) { var sentiment = new SentimentInfo() { SentimentScore = r.sentiment.score, EntityType = r.sentiment.type }; review.SentimentEntities.Add(sentiment); } review.TimeCreated = DateTime.Now; var car = _db.Cars.Where(c => c.Id == Id).Include(u => u.Reviews).FirstOrDefault(); car.Reviews.Add(review); _db.SaveChanges(); }
private static SentimentInfo ProcessText(string text, Dictionary <string, int> valences) { // Remove <a> anchor tags Regex anchorTagRemove = new Regex("<a[*]a>"); text = anchorTagRemove.Replace(text, ""); // Replace non-alphanumeric characters (except periods) with spaces, to preserve ability to split Regex removeNonAlphaNumeric = new Regex("[^a-zA-Z0-9. ]"); text = removeNonAlphaNumeric.Replace(text, " "); // Replace any instance with 2 or more spaces with 1 space text = Regex.Replace(text, @"\s+", " "); // Split the scrubbed text into tokens List <string> tokens = new List <string>(text.Split(" ").Where(s => !string.IsNullOrEmpty(s))); // Get proper nouns from list of tokens Dictionary <string, int> properNounTokens = GetProperNouns(tokens); // Fully strip all non-alphanumeric characters Regex fullScrub = new Regex("[^a-zA-Z0-9 ]"); text = fullScrub.Replace(text, " "); text = Regex.Replace(text, @"\s+", " "); // Create a new list of tokens, free of any non-alphanumeric characters, ready for rudimentary sentiment analysis tokens = new List <string>(text.Split(" ")); // Go through tokens one by one, seeing if they land in AFINN-en-165 dictionary // TODO: refactor this into one of more methods to increase testability int valenceTotal = 0; int numTokens = 0; Dictionary <string, int> positiveTokens = new Dictionary <string, int>(); Dictionary <string, int> negativeTokens = new Dictionary <string, int>(); foreach (string token in tokens.Select(t => t.ToLower())) { // current token appears in valence dictionary if (valences.ContainsKey(token)) { int valence = valences[token]; if (valence > 0) // token is positive { if (positiveTokens.ContainsKey(token)) { positiveTokens[token]++; } else { positiveTokens[token] = 1; } } else // token is negative { if (negativeTokens.ContainsKey(token)) { negativeTokens[token]++; } else { negativeTokens[token] = 1; } } // Add token's valence to valence total valenceTotal += valences[token]; // Increment number of identified tokens numTokens++; } } SentimentInfo info = new SentimentInfo(valenceTotal, numTokens) { PositiveTokens = positiveTokens, NegativeTokens = negativeTokens, ProperNounTokens = properNounTokens }; return(info); }
public static async Task <SentimentInfo> MakeRequest(string linkURL, HttpClient client, string outFileName) { // See if we have information about how to extract information from the domain of the current article DomainSettings domain = NoiseConfigurations.KnownDomains.Where(d => linkURL.Contains(d.Domain)).FirstOrDefault(); if (domain == null) { return(null); } // Make a GET request for the current article var article = await client.GetAsync(linkURL); var articleResponse = await article.Content.ReadAsStringAsync(); // Create HTML document from HTTP response var articleHTML = new HtmlDocument(); articleHTML.LoadHtml(articleResponse); // Select nodes that conform to the domain's relevant element type and class HtmlNodeCollection newsSnippets = articleHTML.DocumentNode.SelectNodes($"//{domain.RelevantElementType}[contains(@class, '{domain.RelevantClassName}')]"); // No nodes that conform to domain's relevant elemnt type and class? skip if (newsSnippets == null || newsSnippets.Count == 0) { return(null); } // Smush all the relevant nodes' inner text into 1 big string (adding spaces between nodes) string nodeConcat = string.Concat(newsSnippets.Select(n => n.InnerText + " ")); // Feed concatenated article into processor SentimentInfo info = ProcessText(nodeConcat, NoiseConfigurations.Valences); using (NpgsqlConnection connection = new NpgsqlConnection(NoiseConfigurations.PostgresConnectionString)) { await connection.OpenAsync(); using (NpgsqlCommand command = new NpgsqlCommand()) { command.Connection = connection; command.CommandText = @" INSERT INTO articles (name, published_on, valence) VALUES (@name, @published_on, @valence)"; command.Parameters.AddWithValue("name", linkURL); command.Parameters.AddWithValue("published_on", DateTime.UtcNow); command.Parameters.AddWithValue("valence", info.ValenceAverage); try { await command.ExecuteNonQueryAsync(); } catch (PostgresException ex) { if (ex.SqlState != "23505") { throw ex; } } } } await File.AppendAllTextAsync(outFileName, $"Article {linkURL} has total valence of {info.Valence}, number of tokens {info.NumTokens}, and average valence of {info.ValenceAverage}\n"); return(info); }
private static async Task CollectSentiments() { // For each scraper configuration (aka TOPIC) foreach (RSSScraperConfiguration topicScraper in NoiseConfigurations.ScraperTopics) { List <string> rssFeedLinks = new List <string>(); var rssResponseString = ""; // Make a request to the RSS feed specified in the scraper NoiseHttpClient.BaseAddress = new Uri(topicScraper.RSSURL); var rssResponseMessage = await NoiseHttpClient.GetAsync(topicScraper.RSSURL); rssResponseString = await rssResponseMessage.Content.ReadAsStringAsync(); // Get a list of article links from the RSS feed response XDocument rssFeedResponseXML = XDocument.Parse(rssResponseString); foreach (var item in rssFeedResponseXML.Root.Descendants().First(i => i.Name.LocalName == "channel").Elements().Where(i => i.Name.LocalName == "item")) { rssFeedLinks.Add(item.Elements().First(i => i.Name.LocalName == topicScraper.LinkLocalName).Value); } // List of sentiment analysis results from all articles List <SentimentInfo> analyzedArticles = new List <SentimentInfo>(); TimeZoneInfo tz = TimeZoneInfo.FindSystemTimeZoneById("Pacific Standard Time"); DateTime currentPST = TimeZoneInfo.ConvertTime(DateTime.UtcNow, tz); string outFile = $"{topicScraper.Topic.ToString()}_{topicScraper.Name}_{currentPST.Year}-{currentPST.Month}-{currentPST.Day}.txt"; // For each article link foreach (var linkURL in rssFeedLinks) { SentimentInfo info = await SentimentUtils.MakeRequest(linkURL, NoiseHttpClient, outFile); if (info != null) { analyzedArticles.Add(info); } } SentimentInfo consolidatedSentimentInfo = SentimentUtils.ConsolidateSentimentInfo(analyzedArticles); // Write sentiments to database using (NpgsqlConnection connection = new NpgsqlConnection(NoiseConfigurations.PostgresConnectionString)) { await connection.OpenAsync(); using (NpgsqlCommand command = new NpgsqlCommand()) { command.Connection = connection; command.CommandText = @" INSERT INTO sentiments (type, date, valence, domain) VALUES (@type, @date, @valence, @domain)"; command.Parameters.AddWithValue("type", (int)topicScraper.Topic); command.Parameters.AddWithValue("date", currentPST); command.Parameters.AddWithValue("valence", consolidatedSentimentInfo.ValenceAverage); command.Parameters.AddWithValue("domain", topicScraper.Name); try { await command.ExecuteNonQueryAsync(); } catch (PostgresException ex) { if (ex.SqlState != "23505") { throw ex; } else { Console.WriteLine($"A sentiment has already been collected for {topicScraper.Topic.ToString()} on {currentPST}"); } } } } // Write sentiment info to file File.AppendAllTextAsync(outFile, $"\n{topicScraper.Topic.ToString()} topic for {currentPST} yielded total valence of {consolidatedSentimentInfo.Valence}, " + $"number of tokens {consolidatedSentimentInfo.NumTokens}, " + $"and average valence of {consolidatedSentimentInfo.ValenceAverage}\n\n").Wait(); // Write the top 10 proper nouns List <KeyValuePair <string, int> > propers = consolidatedSentimentInfo.ProperNounTokens.ToList(); propers.Sort((p1, p2) => p2.Value.CompareTo(p1.Value)); await File.AppendAllTextAsync(outFile, "Proper nouns:\n"); await File.AppendAllLinesAsync(outFile, propers.Take(10).Select(s => $"{s.Key} {s.Value}")); await File.AppendAllTextAsync(outFile, "\n"); // Write the top 10 positive tokens List <KeyValuePair <string, int> > positives = consolidatedSentimentInfo.PositiveTokens.ToList(); positives.Sort((p1, p2) => p2.Value.CompareTo(p1.Value)); await File.AppendAllTextAsync(outFile, "Positive tokens:\n"); await File.AppendAllLinesAsync(outFile, positives.Take(10).Select(s => $"{s.Key} {s.Value}")); await File.AppendAllTextAsync(outFile, "\n"); // Write the top 10 negative tokens List <KeyValuePair <string, int> > negatives = consolidatedSentimentInfo.NegativeTokens.ToList(); negatives.Sort((p1, p2) => p2.Value.CompareTo(p1.Value)); await File.AppendAllTextAsync(outFile, "Negative tokens:\n"); await File.AppendAllLinesAsync(outFile, negatives.Take(10).Select(s => $"{s.Key} {s.Value}")); await File.AppendAllTextAsync(outFile, "\n"); } }