public static void MessageProcess(String tweet, bool printTweet) { var status = new Tweet(); var json = new DataContractJsonSerializer(status.GetType()); try { byte[] byteArray = Encoding.UTF8.GetBytes(tweet); var stream = new MemoryStream(byteArray); status = json.ReadObject(stream) as Tweet; if (printTweet) { lock (_syncRootOutput) { Console.WriteLine(""); Console.WriteLine(status.user.name + " tweeted: " + status.text); Console.WriteLine(""); } } var context = new OclumenContext(); var account = context.TwitterAccounts.FirstOrDefault(x => x.id == status.user.id); if (account == null) { context.TwitterAccounts.Add(status.user); context.TwitterAccounts.FirstOrDefault(x => x.id == status.user.id); context.SaveChanges(); status.user = context.TwitterAccounts.FirstOrDefault(x => x.id == status.user.id); } status.AutoSentimentTimestamp = DateTime.MinValue; status.CorpusSentimentTimestamp = DateTime.MinValue; context.RawTweets.Add(status); if (status.geo != null) { status.geo = context.Locations.Add(status.geo); } context.SaveChanges(); } catch (Exception ex) { Console.WriteLine(ex.Message); } }
public ActionResult RateTweet(long id, int sentiment) { var timestamp = DateTime.UtcNow; Tweet tweet = OclumenContext.RawTweets.First(x => x.id == id); String originalTweet; IList <string> retweets = TwitterTextUtility.GetRetweets(tweet.text, out originalTweet); // if the original tweet was not just a retweet if (!originalTweet.Equals(String.Empty)) { Processor.ProcessTweetTextForCorpus(originalTweet, false, Processor.GetSentimentEnum(sentiment), timestamp, Dictionary, OclumenContext); } OclumenContext.SaveChanges(); //now process retweets foreach (String retweetedString in retweets) { Processor.ProcessTweetTextForCorpus(retweetedString, true, Processor.GetSentimentEnum(sentiment), timestamp, Dictionary, OclumenContext); } // keep track of the sentiment used and when the manual training was actually done tweet.CorpusSentiment = sentiment; tweet.CorpusSentimentTimestamp = timestamp; // if the original tweet was just a retweet, lets update all the other possible retweets in the database if (originalTweet.Equals(String.Empty)) { foreach (String retweetedString in retweets) { String tweetText = retweetedString; IQueryable <Tweet> sameTweet = OclumenContext.RawTweets.Where( x => x.text == tweetText && x.CorpusSentimentTimestamp == DateTime.MinValue); foreach (Tweet tempTweet in sameTweet) { Processor.ProcessTweetTextForCorpus(tempTweet.text, true, Processor.GetSentimentEnum(sentiment), timestamp, Dictionary, OclumenContext); tempTweet.CorpusSentiment = sentiment; tempTweet.CorpusSentimentTimestamp = timestamp; } } } OclumenContext.SaveChanges(); var nextTweet = GetNextTweet(); return(View("Index", UpdateClassifier(new TrainerModel { CurrentTweet = nextTweet }))); }
public ActionResult Delete(long id) { string tweetText = OclumenContext.RawTweets.First(x => x.id == id).text; IQueryable <Tweet> foundTweets = OclumenContext.RawTweets.Where(x => x.text == tweetText); foreach (Tweet tweet in foundTweets) { OclumenContext.RawTweets.Remove(tweet); } OclumenContext.SaveChanges(); Tweet nextTweet = GetNextTweet(); return(View("Index", new TestModel { CurrentTweet = GetNextTweet() })); }
public ActionResult RateTweet(long id, int sentiment) { DateTime timestamp = DateTime.UtcNow; Tweet tweet = OclumenContext.RawTweets.FirstOrDefault(x => x.id == id); if (tweet != null) { tweet.TestSentiment = sentiment; tweet.TestSentimentTimestamp = timestamp; } OclumenContext.SaveChanges(); return(View("Index", new TestModel { CurrentTweet = GetNextTweet() })); }
public ActionResult Delete(long id) { var tweetText = OclumenContext.RawTweets.First(x => x.id == id).text; var foundTweets = OclumenContext.RawTweets.Where(x => x.text == tweetText); foreach (var tweet in foundTweets) { OclumenContext.RawTweets.Remove(tweet); } OclumenContext.SaveChanges(); var nextTweet = GetNextTweet(); return(View("Index", UpdateClassifier(new TrainerModel { CurrentTweet = nextTweet }))); }
public static void HashtagRetweetProcessor() { var context = new OclumenContext(); var localUnprocessedHashtags = new Queue <string>(); var localUnprocessedRetweets = new Queue <string>(); int sleepTime = AppConfigSettings.ProcessTweetThreadSleepTime; while (CrawlerStatus.KeepRunning) { // grab from the producer queue and put into our own local consumer queue lock (_syncRoot) { while (UnprocessedHashtags.Count != 0) { localUnprocessedHashtags.Enqueue(UnprocessedHashtags.Dequeue()); } while (UnprocessedRetweets.Count != 0) { localUnprocessedRetweets.Enqueue(UnprocessedRetweets.Dequeue()); } } // if we didnt get anything from either queue, sleep for a bit if (localUnprocessedHashtags.Count == 0 && localUnprocessedRetweets.Count == 0) { Thread.Sleep(sleepTime); continue; } var now = DateTime.UtcNow; // ok, lets add the recently acquired data to our dataset while (localUnprocessedHashtags.Count != 0) { context.HashtagUseRecords.Add(new HashtagUseRecord() { Tag = localUnprocessedHashtags.Dequeue(), Timestamp = now }); } while (localUnprocessedRetweets.Count != 0) { var currentRetweet = localUnprocessedRetweets.Dequeue(); //remove preceding : if (currentRetweet.Length >= 1 && currentRetweet.Substring(0, 1) == ":") { currentRetweet = currentRetweet.Substring(1, currentRetweet.Length - 1).Trim(); } // remove any proceding rt if (currentRetweet.Length >= 3 && currentRetweet.Substring(0, 3) == "RT ") { currentRetweet = currentRetweet.Substring(3, currentRetweet.Length - 3).Trim(); } // sometimes the rt function produces an empty string with just : so we might want to skip that if (currentRetweet != String.Empty) { context.RetweetUseRecords.Add(new RetweetUseRecord() { Tag = currentRetweet, Timestamp = now }); } } context.SaveChanges(); } }
public static void TweetClassifier() { var context = new OclumenContext(); var classifier = new NaiveBayes(); var smoothingFactor = 1; var unigrams = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >(); var unigramsSt = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >(); var bigrams = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >(); var bigramsSt = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >(); foreach (var ngram in context.BasicNgrams.Where(x => x.Cardinality == 1)) { var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor)); unigrams.Add(ngram.Text.ToLower(), ngramProbabilityList); ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor)); unigrams.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList); } foreach (var ngram in context.StemmedNgrams.Where(x => x.Cardinality == 1)) { var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor)); unigramsSt.Add(ngram.Text.ToLower(), ngramProbabilityList); ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor)); unigramsSt.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList); } foreach (var ngram in context.BasicNgrams.Where(x => x.Cardinality == 2)) { var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor)); bigrams.Add(ngram.Text.ToLower(), ngramProbabilityList); ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor)); bigrams.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList); } foreach (var ngram in context.StemmedNgrams.Where(x => x.Cardinality == 2)) { var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor)); bigramsSt.Add(ngram.Text.ToLower(), ngramProbabilityList); ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor)); ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor)); bigramsSt.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList); } int sleepTime = AppConfigSettings.ProcessTweetThreadSleepTime; while (CrawlerStatus.KeepRunning) { var tweetToProcess = context.RawTweets.First(x => x.AutoSentimentTimestamp == DateTime.MinValue); if (tweetToProcess == null) { Thread.Sleep(sleepTime); continue; } // add the hashtags to process queue String originalTweet; var hashtags = TwitterTextUtility.GetHashtags(tweetToProcess.text); var retweets = TwitterTextUtility.GetRetweets(tweetToProcess.text, out originalTweet); lock (_syncRoot) { foreach (var hashtag in hashtags) { UnprocessedHashtags.Enqueue(hashtag); } foreach (var retweet in retweets) { UnprocessedRetweets.Enqueue(retweet); } } // auto classify the tweets tweetToProcess.AutoUnigram = (int)classifier.GetTextSentiment(tweetToProcess.text, 1, 1, false, Dictionary, context.BasicNgrams, context, unigrams); tweetToProcess.AutoUnigramStemmed = (int)classifier.GetTextSentiment(tweetToProcess.text, 1, 1, true, Dictionary, context.StemmedNgrams, context, unigramsSt); tweetToProcess.AutoBigram = (int)classifier.GetTextSentiment(tweetToProcess.text, 2, 1, false, Dictionary, context.BasicNgrams, context, bigrams); tweetToProcess.AutoBigramStemmed = (int)classifier.GetTextSentiment(tweetToProcess.text, 2, 1, true, Dictionary, context.StemmedNgrams, context, bigramsSt); tweetToProcess.AutoSentimentTimestamp = DateTime.UtcNow; context.SaveChanges(); } }