public ActionResult RateTweet(long id, int sentiment) { var timestamp = DateTime.UtcNow; Tweet tweet = OclumenContext.RawTweets.First(x => x.id == id); String originalTweet; IList <string> retweets = TwitterTextUtility.GetRetweets(tweet.text, out originalTweet); // if the original tweet was not just a retweet if (!originalTweet.Equals(String.Empty)) { Processor.ProcessTweetTextForCorpus(originalTweet, false, Processor.GetSentimentEnum(sentiment), timestamp, Dictionary, OclumenContext); } OclumenContext.SaveChanges(); //now process retweets foreach (String retweetedString in retweets) { Processor.ProcessTweetTextForCorpus(retweetedString, true, Processor.GetSentimentEnum(sentiment), timestamp, Dictionary, OclumenContext); } // keep track of the sentiment used and when the manual training was actually done tweet.CorpusSentiment = sentiment; tweet.CorpusSentimentTimestamp = timestamp; // if the original tweet was just a retweet, lets update all the other possible retweets in the database if (originalTweet.Equals(String.Empty)) { foreach (String retweetedString in retweets) { String tweetText = retweetedString; IQueryable <Tweet> sameTweet = OclumenContext.RawTweets.Where( x => x.text == tweetText && x.CorpusSentimentTimestamp == DateTime.MinValue); foreach (Tweet tempTweet in sameTweet) { Processor.ProcessTweetTextForCorpus(tempTweet.text, true, Processor.GetSentimentEnum(sentiment), timestamp, Dictionary, OclumenContext); tempTweet.CorpusSentiment = sentiment; tempTweet.CorpusSentimentTimestamp = timestamp; } } } OclumenContext.SaveChanges(); var nextTweet = GetNextTweet(); return(View("Index", UpdateClassifier(new TrainerModel { CurrentTweet = nextTweet }))); }
/// <summary> /// Gets the text sentiment, automatically decides whether to use the tweet or retweet corpus counts /// </summary> /// <typeparam name="TEntity">The type of the entity.</typeparam> /// <param name="text">The text.</param> /// <param name="ngramCardinality">The ngram cardinality.</param> /// <param name="smoothingFactor">The smoothing factor.</param> /// <param name="isStemmed">if set to <c>true</c> [is stemmed].</param> /// <param name="dictionary">The dictionary used by the stemmer.</param> /// <param name="ngramDbSet">The ngram db set.</param> /// <param name="oclumenContext">The oclumen context.</param> /// <returns></returns> public Sentiment GetTextSentiment <TEntity>(string text, int ngramCardinality, decimal smoothingFactor, bool isStemmed, IDictionary <String, String> dictionary, DbSet <TEntity> ngramDbSet, IOclumenContext oclumenContext, Dictionary <string, List <KeyValuePair <Sentiment, decimal> > > ngramDictionary = null) where TEntity : NgramBase { String originalText; TwitterTextUtility.GetRetweets(text, out originalText); if (originalText == String.Empty) { return(GetTextSentiment(text, true, ngramCardinality, smoothingFactor, isStemmed, dictionary, ngramDbSet, oclumenContext, ngramDictionary)); } else { return(GetTextSentiment(text, false, ngramCardinality, smoothingFactor, isStemmed, dictionary, ngramDbSet, oclumenContext, ngramDictionary)); } }
public void TestGetHashtagsFromRetweets() { // the hashtag #youjustpulledanobama was popular at the time so I used it, no political reson behind it String originalTweet; var results = TwitterTextUtility.GetRetweets("truth #youjustpulledanobama#test RT @obama I am the president #testtwo #helloworld #youjustpulledanobama", out originalTweet); var hashtagResults = TwitterTextUtility.GetHashtags(originalTweet); Assert.IsTrue(hashtagResults.Contains("#youjustpulledanobama")); Assert.IsTrue(hashtagResults.Contains("#test")); Assert.IsTrue(hashtagResults.Count == 2); hashtagResults = TwitterTextUtility.GetHashtags(results[0]); Assert.IsTrue(hashtagResults.Contains("#testtwo")); Assert.IsTrue(hashtagResults.Contains("#helloworld")); Assert.IsTrue(hashtagResults.Contains("#youjustpulledanobama")); Assert.IsTrue(hashtagResults.Count == 3); Assert.IsTrue(results.Count == 1); }
public void TestGetRetweets() { String originalTweet; Assert.IsTrue(TwitterTextUtility.GetRetweets("", out originalTweet).Count == 0); var results = TwitterTextUtility.GetRetweets("truth RT @obama I am the president", out originalTweet); Assert.IsTrue(results.Count == 1); Assert.AreEqual(results[0], "I am the president"); Assert.AreEqual(originalTweet, "truth"); results = TwitterTextUtility.GetRetweets("RT @obama hello world RT @matt i like dogs RT @cnn some news", out originalTweet); Assert.IsTrue(results.Count == 3); Assert.AreEqual(results[0], "hello world"); Assert.AreEqual(results[1], "i like dogs"); Assert.AreEqual(results[2], "some news"); Assert.AreEqual(originalTweet, String.Empty); }
public void TestGetHashtags() { Assert.IsTrue(TwitterTextUtility.GetHashtags("hello world, no hashtags to see here").Count == 0); var results = TwitterTextUtility.GetHashtags("#helloworld"); Assert.IsTrue(results.Count == 1); Assert.AreEqual(results[0], "#helloworld"); results = TwitterTextUtility.GetHashtags("#one #two#three"); Assert.IsTrue(results.Count == 3); Assert.AreEqual(results[0], "#one"); Assert.AreEqual(results[1], "#two"); Assert.AreEqual(results[2], "#three"); results = TwitterTextUtility.GetHashtags("#test this is a #one hi #two#three"); Assert.IsTrue(results.Count == 4); Assert.AreEqual(results[0], "#test"); Assert.AreEqual(results[1], "#one"); Assert.AreEqual(results[2], "#two"); Assert.AreEqual(results[3], "#three"); }
public static void TweetClassifier() { var context = new OclumenContext(); var classifier = new NaiveBayes(); var smoothingFactor = 1; var unigrams = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >(); var unigramsSt = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >(); var bigrams = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >(); var bigramsSt = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >(); foreach (var ngram in context.BasicNgrams.Where(x => x.Cardinality == 1)) { var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor)); unigrams.Add(ngram.Text.ToLower(), ngramProbabilityList); ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor)); unigrams.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList); } foreach (var ngram in context.StemmedNgrams.Where(x => x.Cardinality == 1)) { var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor)); unigramsSt.Add(ngram.Text.ToLower(), ngramProbabilityList); ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor)); unigramsSt.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList); } foreach (var ngram in context.BasicNgrams.Where(x => x.Cardinality == 2)) { var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor)); bigrams.Add(ngram.Text.ToLower(), ngramProbabilityList); ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor)); bigrams.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList); } foreach (var ngram in context.StemmedNgrams.Where(x => x.Cardinality == 2)) { var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor)); bigramsSt.Add(ngram.Text.ToLower(), ngramProbabilityList); ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor)); bigramsSt.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList); } int sleepTime = AppConfigSettings.ProcessTweetThreadSleepTime; while (CrawlerStatus.KeepRunning) { var tweetToProcess = context.RawTweets.First(x => x.AutoSentimentTimestamp == DateTime.MinValue); if (tweetToProcess == null) { Thread.Sleep(sleepTime); continue; } // add the hashtags to process queue String originalTweet; var hashtags = TwitterTextUtility.GetHashtags(tweetToProcess.text); var retweets = TwitterTextUtility.GetRetweets(tweetToProcess.text, out originalTweet); lock (_syncRoot) { foreach (var hashtag in hashtags) { UnprocessedHashtags.Enqueue(hashtag); } foreach (var retweet in retweets) { UnprocessedRetweets.Enqueue(retweet); } } // auto classify the tweets tweetToProcess.AutoUnigram = (int)classifier.GetTextSentiment(tweetToProcess.text, 1, 1, false, Dictionary, context.BasicNgrams, context, unigrams); tweetToProcess.AutoUnigramStemmed = (int)classifier.GetTextSentiment(tweetToProcess.text, 1, 1, true, Dictionary, context.StemmedNgrams, context, unigramsSt); tweetToProcess.AutoBigram = (int)classifier.GetTextSentiment(tweetToProcess.text, 2, 1, false, Dictionary, context.BasicNgrams, context, bigrams); tweetToProcess.AutoBigramStemmed = (int)classifier.GetTextSentiment(tweetToProcess.text, 2, 1, true, Dictionary, context.StemmedNgrams, context, bigramsSt); tweetToProcess.AutoSentimentTimestamp = DateTime.UtcNow; context.SaveChanges(); } }