public ActionResult RateTweet(long id, int sentiment)
        {
            var timestamp = DateTime.UtcNow;

            Tweet          tweet = OclumenContext.RawTweets.First(x => x.id == id);
            String         originalTweet;
            IList <string> retweets = TwitterTextUtility.GetRetweets(tweet.text, out originalTweet);

            // if the original tweet was not just a retweet
            if (!originalTweet.Equals(String.Empty))
            {
                Processor.ProcessTweetTextForCorpus(originalTweet, false, Processor.GetSentimentEnum(sentiment),
                                                    timestamp, Dictionary, OclumenContext);
            }

            OclumenContext.SaveChanges();

            //now process retweets
            foreach (String retweetedString in retweets)
            {
                Processor.ProcessTweetTextForCorpus(retweetedString, true, Processor.GetSentimentEnum(sentiment),
                                                    timestamp, Dictionary, OclumenContext);
            }

            // keep track of the sentiment used and when the manual training was actually done
            tweet.CorpusSentiment          = sentiment;
            tweet.CorpusSentimentTimestamp = timestamp;

            // if the original tweet was just a retweet, lets update all the other possible retweets in the database
            if (originalTweet.Equals(String.Empty))
            {
                foreach (String retweetedString in retweets)
                {
                    String             tweetText = retweetedString;
                    IQueryable <Tweet> sameTweet =
                        OclumenContext.RawTweets.Where(
                            x => x.text == tweetText && x.CorpusSentimentTimestamp == DateTime.MinValue);

                    foreach (Tweet tempTweet in sameTweet)
                    {
                        Processor.ProcessTweetTextForCorpus(tempTweet.text, true, Processor.GetSentimentEnum(sentiment),
                                                            timestamp, Dictionary, OclumenContext);
                        tempTweet.CorpusSentiment          = sentiment;
                        tempTweet.CorpusSentimentTimestamp = timestamp;
                    }
                }
            }

            OclumenContext.SaveChanges();

            var nextTweet = GetNextTweet();

            return(View("Index", UpdateClassifier(new TrainerModel {
                CurrentTweet = nextTweet
            })));
        }
        public static void MessageProcess(String tweet, bool printTweet)
        {
            var status = new Tweet();
            var json   = new DataContractJsonSerializer(status.GetType());

            try
            {
                byte[] byteArray = Encoding.UTF8.GetBytes(tweet);
                var    stream    = new MemoryStream(byteArray);
                status = json.ReadObject(stream) as Tweet;

                if (printTweet)
                {
                    lock (_syncRootOutput)
                    {
                        Console.WriteLine("");
                        Console.WriteLine(status.user.name + " tweeted: " + status.text);
                        Console.WriteLine("");
                    }
                }

                var context = new OclumenContext();
                var account = context.TwitterAccounts.FirstOrDefault(x => x.id == status.user.id);

                if (account == null)
                {
                    context.TwitterAccounts.Add(status.user);
                    context.TwitterAccounts.FirstOrDefault(x => x.id == status.user.id);
                    context.SaveChanges();
                    status.user = context.TwitterAccounts.FirstOrDefault(x => x.id == status.user.id);
                }

                status.AutoSentimentTimestamp   = DateTime.MinValue;
                status.CorpusSentimentTimestamp = DateTime.MinValue;

                context.RawTweets.Add(status);
                if (status.geo != null)
                {
                    status.geo = context.Locations.Add(status.geo);
                }

                context.SaveChanges();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }
        public ActionResult Delete(long id)
        {
            string tweetText = OclumenContext.RawTweets.First(x => x.id == id).text;

            IQueryable <Tweet> foundTweets = OclumenContext.RawTweets.Where(x => x.text == tweetText);

            foreach (Tweet tweet in foundTweets)
            {
                OclumenContext.RawTweets.Remove(tweet);
            }

            OclumenContext.SaveChanges();

            Tweet nextTweet = GetNextTweet();

            return(View("Index", new TestModel {
                CurrentTweet = GetNextTweet()
            }));
        }
        public ActionResult RateTweet(long id, int sentiment)
        {
            DateTime timestamp = DateTime.UtcNow;

            Tweet tweet = OclumenContext.RawTweets.FirstOrDefault(x => x.id == id);

            if (tweet != null)
            {
                tweet.TestSentiment          = sentiment;
                tweet.TestSentimentTimestamp = timestamp;
            }

            OclumenContext.SaveChanges();


            return(View("Index", new TestModel {
                CurrentTweet = GetNextTweet()
            }));
        }
        public ActionResult Delete(long id)
        {
            var tweetText = OclumenContext.RawTweets.First(x => x.id == id).text;

            var foundTweets = OclumenContext.RawTweets.Where(x => x.text == tweetText);

            foreach (var tweet in foundTweets)
            {
                OclumenContext.RawTweets.Remove(tweet);
            }

            OclumenContext.SaveChanges();

            var nextTweet = GetNextTweet();

            return(View("Index", UpdateClassifier(new TrainerModel {
                CurrentTweet = nextTweet
            })));
        }
 public BaseController()
 {
     OclumenContext = new OclumenContext();
 }
        public static void HashtagRetweetProcessor()
        {
            var context = new OclumenContext();

            var localUnprocessedHashtags = new Queue <string>();
            var localUnprocessedRetweets = new Queue <string>();

            int sleepTime = AppConfigSettings.ProcessTweetThreadSleepTime;

            while (CrawlerStatus.KeepRunning)
            {
                // grab from the producer queue and put into our own local consumer queue
                lock (_syncRoot)
                {
                    while (UnprocessedHashtags.Count != 0)
                    {
                        localUnprocessedHashtags.Enqueue(UnprocessedHashtags.Dequeue());
                    }

                    while (UnprocessedRetweets.Count != 0)
                    {
                        localUnprocessedRetweets.Enqueue(UnprocessedRetweets.Dequeue());
                    }
                }

                // if we didnt get anything from either queue, sleep for a bit
                if (localUnprocessedHashtags.Count == 0 && localUnprocessedRetweets.Count == 0)
                {
                    Thread.Sleep(sleepTime);
                    continue;
                }

                var now = DateTime.UtcNow;

                // ok, lets add the recently acquired data to our dataset
                while (localUnprocessedHashtags.Count != 0)
                {
                    context.HashtagUseRecords.Add(new HashtagUseRecord()
                    {
                        Tag       = localUnprocessedHashtags.Dequeue(),
                        Timestamp = now
                    });
                }

                while (localUnprocessedRetweets.Count != 0)
                {
                    var currentRetweet = localUnprocessedRetweets.Dequeue();

                    //remove preceding :
                    if (currentRetweet.Length >= 1 && currentRetweet.Substring(0, 1) == ":")
                    {
                        currentRetweet = currentRetweet.Substring(1, currentRetweet.Length - 1).Trim();
                    }

                    // remove any proceding rt
                    if (currentRetweet.Length >= 3 && currentRetweet.Substring(0, 3) == "RT ")
                    {
                        currentRetweet = currentRetweet.Substring(3, currentRetweet.Length - 3).Trim();
                    }

                    // sometimes the rt function produces an empty string with just : so we might want to skip that
                    if (currentRetweet != String.Empty)
                    {
                        context.RetweetUseRecords.Add(new RetweetUseRecord()
                        {
                            Tag       = currentRetweet,
                            Timestamp = now
                        });
                    }
                }

                context.SaveChanges();
            }
        }
        public static void TweetClassifier()
        {
            var context         = new OclumenContext();
            var classifier      = new NaiveBayes();
            var smoothingFactor = 1;

            var unigrams   = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();
            var unigramsSt = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();
            var bigrams    = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();
            var bigramsSt  = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();

            foreach (var ngram in context.BasicNgrams.Where(x => x.Cardinality == 1))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                unigrams.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                unigrams.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }

            foreach (var ngram in context.StemmedNgrams.Where(x => x.Cardinality == 1))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                unigramsSt.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                unigramsSt.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }

            foreach (var ngram in context.BasicNgrams.Where(x => x.Cardinality == 2))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                bigrams.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                bigrams.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }

            foreach (var ngram in context.StemmedNgrams.Where(x => x.Cardinality == 2))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                bigramsSt.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                bigramsSt.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }


            int sleepTime = AppConfigSettings.ProcessTweetThreadSleepTime;

            while (CrawlerStatus.KeepRunning)
            {
                var tweetToProcess = context.RawTweets.First(x => x.AutoSentimentTimestamp == DateTime.MinValue);

                if (tweetToProcess == null)
                {
                    Thread.Sleep(sleepTime);
                    continue;
                }

                // add the hashtags to process queue
                String originalTweet;
                var    hashtags = TwitterTextUtility.GetHashtags(tweetToProcess.text);
                var    retweets = TwitterTextUtility.GetRetweets(tweetToProcess.text, out originalTweet);

                lock (_syncRoot)
                {
                    foreach (var hashtag in hashtags)
                    {
                        UnprocessedHashtags.Enqueue(hashtag);
                    }

                    foreach (var retweet in retweets)
                    {
                        UnprocessedRetweets.Enqueue(retweet);
                    }
                }

                // auto classify the tweets
                tweetToProcess.AutoUnigram        = (int)classifier.GetTextSentiment(tweetToProcess.text, 1, 1, false, Dictionary, context.BasicNgrams, context, unigrams);
                tweetToProcess.AutoUnigramStemmed = (int)classifier.GetTextSentiment(tweetToProcess.text, 1, 1, true, Dictionary, context.StemmedNgrams, context, unigramsSt);

                tweetToProcess.AutoBigram        = (int)classifier.GetTextSentiment(tweetToProcess.text, 2, 1, false, Dictionary, context.BasicNgrams, context, bigrams);
                tweetToProcess.AutoBigramStemmed = (int)classifier.GetTextSentiment(tweetToProcess.text, 2, 1, true, Dictionary, context.StemmedNgrams, context, bigramsSt);

                tweetToProcess.AutoSentimentTimestamp = DateTime.UtcNow;

                context.SaveChanges();
            }
        }