public ActionResult RateTweet(long id, int sentiment)
        {
            var timestamp = DateTime.UtcNow;

            Tweet          tweet = OclumenContext.RawTweets.First(x => x.id == id);
            String         originalTweet;
            IList <string> retweets = TwitterTextUtility.GetRetweets(tweet.text, out originalTweet);

            // if the original tweet was not just a retweet
            if (!originalTweet.Equals(String.Empty))
            {
                Processor.ProcessTweetTextForCorpus(originalTweet, false, Processor.GetSentimentEnum(sentiment),
                                                    timestamp, Dictionary, OclumenContext);
            }

            OclumenContext.SaveChanges();

            //now process retweets
            foreach (String retweetedString in retweets)
            {
                Processor.ProcessTweetTextForCorpus(retweetedString, true, Processor.GetSentimentEnum(sentiment),
                                                    timestamp, Dictionary, OclumenContext);
            }

            // keep track of the sentiment used and when the manual training was actually done
            tweet.CorpusSentiment          = sentiment;
            tweet.CorpusSentimentTimestamp = timestamp;

            // if the original tweet was just a retweet, lets update all the other possible retweets in the database
            if (originalTweet.Equals(String.Empty))
            {
                foreach (String retweetedString in retweets)
                {
                    String             tweetText = retweetedString;
                    IQueryable <Tweet> sameTweet =
                        OclumenContext.RawTweets.Where(
                            x => x.text == tweetText && x.CorpusSentimentTimestamp == DateTime.MinValue);

                    foreach (Tweet tempTweet in sameTweet)
                    {
                        Processor.ProcessTweetTextForCorpus(tempTweet.text, true, Processor.GetSentimentEnum(sentiment),
                                                            timestamp, Dictionary, OclumenContext);
                        tempTweet.CorpusSentiment          = sentiment;
                        tempTweet.CorpusSentimentTimestamp = timestamp;
                    }
                }
            }

            OclumenContext.SaveChanges();

            var nextTweet = GetNextTweet();

            return(View("Index", UpdateClassifier(new TrainerModel {
                CurrentTweet = nextTweet
            })));
        }
예제 #2
0
        /// <summary>
        /// Gets the text sentiment, automatically decides whether to use the tweet or retweet corpus counts
        /// </summary>
        /// <typeparam name="TEntity">The type of the entity.</typeparam>
        /// <param name="text">The text.</param>
        /// <param name="ngramCardinality">The ngram cardinality.</param>
        /// <param name="smoothingFactor">The smoothing factor.</param>
        /// <param name="isStemmed">if set to <c>true</c> [is stemmed].</param>
        /// <param name="dictionary">The dictionary used by the stemmer.</param>
        /// <param name="ngramDbSet">The ngram db set.</param>
        /// <param name="oclumenContext">The oclumen context.</param>
        /// <returns></returns>
        public Sentiment GetTextSentiment <TEntity>(string text, int ngramCardinality, decimal smoothingFactor, bool isStemmed, IDictionary <String, String> dictionary,
                                                    DbSet <TEntity> ngramDbSet, IOclumenContext oclumenContext, Dictionary <string, List <KeyValuePair <Sentiment, decimal> > > ngramDictionary = null) where TEntity : NgramBase
        {
            String originalText;

            TwitterTextUtility.GetRetweets(text, out originalText);

            if (originalText == String.Empty)
            {
                return(GetTextSentiment(text, true, ngramCardinality, smoothingFactor, isStemmed, dictionary, ngramDbSet, oclumenContext, ngramDictionary));
            }
            else
            {
                return(GetTextSentiment(text, false, ngramCardinality, smoothingFactor, isStemmed, dictionary, ngramDbSet, oclumenContext, ngramDictionary));
            }
        }
예제 #3
0
        public void TestGetHashtagsFromRetweets()
        {
            // the hashtag #youjustpulledanobama was popular at the time so I used it, no political reson behind it
            String originalTweet;
            var    results = TwitterTextUtility.GetRetweets("truth #youjustpulledanobama#test RT @obama I am the president #testtwo #helloworld #youjustpulledanobama", out originalTweet);

            var hashtagResults = TwitterTextUtility.GetHashtags(originalTweet);

            Assert.IsTrue(hashtagResults.Contains("#youjustpulledanobama"));
            Assert.IsTrue(hashtagResults.Contains("#test"));
            Assert.IsTrue(hashtagResults.Count == 2);

            hashtagResults = TwitterTextUtility.GetHashtags(results[0]);
            Assert.IsTrue(hashtagResults.Contains("#testtwo"));
            Assert.IsTrue(hashtagResults.Contains("#helloworld"));
            Assert.IsTrue(hashtagResults.Contains("#youjustpulledanobama"));
            Assert.IsTrue(hashtagResults.Count == 3);

            Assert.IsTrue(results.Count == 1);
        }
예제 #4
0
        public void TestGetRetweets()
        {
            String originalTweet;

            Assert.IsTrue(TwitterTextUtility.GetRetweets("", out originalTweet).Count == 0);

            var results = TwitterTextUtility.GetRetweets("truth RT @obama I am the president", out originalTweet);

            Assert.IsTrue(results.Count == 1);
            Assert.AreEqual(results[0], "I am the president");
            Assert.AreEqual(originalTweet, "truth");

            results = TwitterTextUtility.GetRetweets("RT @obama hello world RT @matt i like dogs RT @cnn some news", out originalTweet);

            Assert.IsTrue(results.Count == 3);
            Assert.AreEqual(results[0], "hello world");
            Assert.AreEqual(results[1], "i like dogs");
            Assert.AreEqual(results[2], "some news");
            Assert.AreEqual(originalTweet, String.Empty);
        }
예제 #5
0
        public void TestGetHashtags()
        {
            Assert.IsTrue(TwitterTextUtility.GetHashtags("hello world, no hashtags to see here").Count == 0);

            var results = TwitterTextUtility.GetHashtags("#helloworld");

            Assert.IsTrue(results.Count == 1);
            Assert.AreEqual(results[0], "#helloworld");

            results = TwitterTextUtility.GetHashtags("#one #two#three");
            Assert.IsTrue(results.Count == 3);
            Assert.AreEqual(results[0], "#one");
            Assert.AreEqual(results[1], "#two");
            Assert.AreEqual(results[2], "#three");

            results = TwitterTextUtility.GetHashtags("#test this is a #one hi #two#three");
            Assert.IsTrue(results.Count == 4);
            Assert.AreEqual(results[0], "#test");
            Assert.AreEqual(results[1], "#one");
            Assert.AreEqual(results[2], "#two");
            Assert.AreEqual(results[3], "#three");
        }
예제 #6
0
        public static void TweetClassifier()
        {
            var context         = new OclumenContext();
            var classifier      = new NaiveBayes();
            var smoothingFactor = 1;

            var unigrams   = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();
            var unigramsSt = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();
            var bigrams    = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();
            var bigramsSt  = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();

            foreach (var ngram in context.BasicNgrams.Where(x => x.Cardinality == 1))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                unigrams.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                unigrams.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }

            foreach (var ngram in context.StemmedNgrams.Where(x => x.Cardinality == 1))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                unigramsSt.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                unigramsSt.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }

            foreach (var ngram in context.BasicNgrams.Where(x => x.Cardinality == 2))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                bigrams.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                bigrams.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }

            foreach (var ngram in context.StemmedNgrams.Where(x => x.Cardinality == 2))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                bigramsSt.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                bigramsSt.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }


            int sleepTime = AppConfigSettings.ProcessTweetThreadSleepTime;

            while (CrawlerStatus.KeepRunning)
            {
                var tweetToProcess = context.RawTweets.First(x => x.AutoSentimentTimestamp == DateTime.MinValue);

                if (tweetToProcess == null)
                {
                    Thread.Sleep(sleepTime);
                    continue;
                }

                // add the hashtags to process queue
                String originalTweet;
                var    hashtags = TwitterTextUtility.GetHashtags(tweetToProcess.text);
                var    retweets = TwitterTextUtility.GetRetweets(tweetToProcess.text, out originalTweet);

                lock (_syncRoot)
                {
                    foreach (var hashtag in hashtags)
                    {
                        UnprocessedHashtags.Enqueue(hashtag);
                    }

                    foreach (var retweet in retweets)
                    {
                        UnprocessedRetweets.Enqueue(retweet);
                    }
                }

                // auto classify the tweets
                tweetToProcess.AutoUnigram        = (int)classifier.GetTextSentiment(tweetToProcess.text, 1, 1, false, Dictionary, context.BasicNgrams, context, unigrams);
                tweetToProcess.AutoUnigramStemmed = (int)classifier.GetTextSentiment(tweetToProcess.text, 1, 1, true, Dictionary, context.StemmedNgrams, context, unigramsSt);

                tweetToProcess.AutoBigram        = (int)classifier.GetTextSentiment(tweetToProcess.text, 2, 1, false, Dictionary, context.BasicNgrams, context, bigrams);
                tweetToProcess.AutoBigramStemmed = (int)classifier.GetTextSentiment(tweetToProcess.text, 2, 1, true, Dictionary, context.StemmedNgrams, context, bigramsSt);

                tweetToProcess.AutoSentimentTimestamp = DateTime.UtcNow;

                context.SaveChanges();
            }
        }