Beispiel #1
0
 public Tweet[] GetPotentialTweets(bool retweets = false)
 {
     if (!retweets)
     {
         if (potentialTweets == null)
         {
             potentialTweets = tweetRepo.Query(POTENTIAL_TWEETS + SettingsGuid, 0, 0).ToList();
         }
         return(potentialTweets.ToArray());
     }
     else
     {
         if (potentialReTweets == null)
         {
             potentialReTweets = tweetRepo.Query(POTENTIAL_RETWEETS + SettingsGuid, 0, 0).ToList();
         }
         return(potentialReTweets.ToArray());
     }
 }
Beispiel #2
0
        static void Main(string[] args)
        {
            var connectionString = ConfigurationManager.AppSettings["AzureStorageConnectionString"];

            if (string.IsNullOrEmpty(connectionString))
            {
                throw new Exception("Config Section 'appSettings' missing AzureStorageConnectionString value!");
            }

            var storageAccount = CloudStorageAccount.Parse(connectionString);
            var blobClient     = storageAccount.CreateCloudBlobClient();

            var deleteTasks = new List <Task>(10000000);

            Action <Task> delete = t =>
            {
                lock (deleteTasks)
                {
                    deleteTasks.Add(t);
                }
                Console.Clear();
                Console.WriteLine("Deleting " + deleteTasks.Count + " items");
            };

            var version = TwitterModel.VERSION;

            bool content      = args.Any(a => a.ToLower() == "content");
            bool cleanVersion = args.Any(a => a.ToLower() == "version");
            bool shrink       = args.Any(a => a.ToLower().StartsWith("shrink"));
            int  newSize      = args.Where(a => a.ToLower().StartsWith("shrink")).Select(x => x == "shrink" ? 500 : int.Parse(x.Replace("shrink", ""))).FirstOrDefault();

            blobClient.ListContainers()
            //.Skip(1) //Short circuit for testing
            //.Take(1) //Short circuit for testing
            .ToList().AsParallel().ForAll(c =>
            {
                #region Index Cleanup
                var index = c.GetDirectoryReference("Index");
                foreach (var b in index.ListBlobs().Where(x => x is CloudBlockBlob).Cast <CloudBlockBlob>())
                {
                    if (cleanVersion && !b.Name.Contains(version))
                    {
                        //Delete Index
                        var i = c.GetBlockBlobReference(b.Name);
                        delete(i.DeleteIfExistsAsync());

                        //Delete all Tweets
                        var d = c.GetDirectoryReference(b.Name.Split('/').Last());
                        foreach (var t in d.ListBlobs().Where(x => x is CloudBlockBlob).Cast <CloudBlockBlob>())
                        {
                            delete(t.DeleteIfExistsAsync());
                        }
                    }

                    if (shrink && newSize > 0 && b.Name.Contains(version))
                    {
                        //Get Storage Index
                        var i            = c.GetBlockBlobReference(b.Name);
                        var storageIndex = Newtonsoft.Json.JsonConvert.DeserializeObject <StorageEntityIndex>(DownloadBlob(i));

                        if (storageIndex.EntityKeys.Count > newSize)
                        {
                            //Delete extra Tweets
                            var d = c.GetDirectoryReference(b.Name.Split('/').Last());
                            foreach (var t in d.ListBlobs().Where(x => x is CloudBlockBlob).Cast <CloudBlockBlob>().OrderByDescending(x => x.Properties.LastModified).Skip(newSize))
                            {
                                storageIndex.EntityKeys.Remove(t.Name.Split('/').Last());
                                delete(t.DeleteIfExistsAsync());
                            }

                            //Update Storage Index
                            UploadBlob(i, storageIndex);
                        }
                    }
                }
                #endregion

                #region Content Cleanup
                if (content)
                {
                    try
                    {
                        var repoIndex = new SimpleRepository <ArticleStubIndex>(c.Name);
                        var repoPage  = new SimpleRepository <ArticleStubPage>(c.Name);
                        var stubIndex = repoIndex.Query(TwitterModel.Instance(c.Name).CONTENT_INDEX).FirstOrDefault();
                        if (stubIndex != null)
                        {
                            var remove = new List <KeyValuePair <long, string> >();
                            for (var i = stubIndex.ArticleStubPages.Count - 31; i > -1 && i < stubIndex.ArticleStubPages.Count; i++) // Only the last month(ish)
                            {
                                var si   = stubIndex.ArticleStubPages[i];
                                var page = repoPage.Query(TwitterModel.Instance(c.Name).CONTENT + "_" + si.Value).FirstOrDefault();
                                if (page.ArticleStubs == null || page.ArticleStubs.Count == 0)
                                {
                                    repoPage.Delete(TwitterModel.Instance(c.Name).CONTENT + "_" + si.Value);
                                    remove.Add(si);
                                }
                            }
                            remove.ForEach(x => stubIndex.ArticleStubPages.Remove(x));
                            repoIndex.Save(TwitterModel.Instance(c.Name).CONTENT_INDEX, stubIndex);
                        }
                    }
                    catch { }
                }
                #endregion
            });

            Console.WriteLine("Waiting on all tasks to complete");
            Task.WaitAll(deleteTasks.ToArray());
        }
Beispiel #3
0
        protected override void StoreInRepository(IEnumerable <Twitter.Tweet> tweets)
        {
            var start  = DateTime.Now.AddHours(-48);
            var dayTag = "_" + DateTime.Now.ToShortDateString();

            Func <Tweet, bool> where = t =>
                                       t != null &&
                                       //Should everything be displayed or do you only want content
                                       (User.OnlyTweetsWithLinks == false || (t.Links != null && t.Links.Count > 0)) &&
                                       //Minumum threshold applied so we get results worth seeing (if it is your own tweet it gets a pass on this step)
                                       ((t.RetweetCount > User.RetweetThreshold || t.User.ScreenName.ToLower() == User.TwitterScreenName.ToLower()) &&
                                       //Apply Date Range
                                        (t.CreatedAt >= start));

            Tweets = Tweets.Union(tweets.Where(where)).OrderByDescending(x => x.TweetRank).Take(MAX_CONTENT).ToList();

            var groups = Tweets
                         //Group similar tweets
                         .GroupSimilar2()
                         //Convert groups into something we can display
                         .Select(g => new TweetGroup(g)
            {
                RepositoryKey = TwitterModel.Instance(User.TwitterScreenName).CONTENT
            })
                         //Order by TweetRank
                         .OrderByDescending(g => g.TweetRank)
                         //Only the top content
                         .Take(MAX_CONTENT);

            Task <List <ArticleStub> > contentTask = null;
            Task continueTask = null;

            if (groups != null && groups.Count() > 0)
            {
                //Get Standard Deviation
                double stdev  = 0;
                var    values = groups.Select(x => x.TweetRank);
                double avg    = values.Average();
                stdev = Math.Sqrt(values.Sum(d => (d - avg) * (d - avg)) / values.Count());

                //Filter groups that are way high...
                //groups = groups.Where(x => x.TweetRank < (avg + stdev));

                var results = groups.OrderByDescending(x => x.TweetRank).ToList();
                contentTask  = CreateContent(results, Page);
                continueTask = contentTask.ContinueWith(task => {
                    if (task.Result.Count >= 25)
                    {
                        var key = TwitterModel.Instance(screenName).CONTENT.ToLower();
                        Page    = new ArticleStubPage(1, task.Result.Take(100));

                        repoPage.Delete(key);
                        repoPage.Save(key, Page);

                        repoPage.Delete(key + dayTag);
                        repoPage.Save(key + dayTag, Page);

                        var articleStubIndex = repoIndex.Query(TwitterModel.Instance(screenName).CONTENT_INDEX).FirstOrDefault() ?? new ArticleStubIndex();
                        var day = DateTime.Now.StartOfDay();
                        if (articleStubIndex.ArticleStubPages.Where(x => x.Key == day.ToFileTimeUtc()).Count() == 0)
                        {
                            articleStubIndex.ArticleStubPages.Add(new KeyValuePair <long, string>(day.ToFileTimeUtc(), day.ToShortDateString()));
                            repoIndex.Save(TwitterModel.Instance(screenName).CONTENT_INDEX, articleStubIndex);
                        }
                    }
                });
            }

            base.StoreInRepository(tweets);

            if (contentTask != null && contentTask != null)
            {
                Task.WaitAll(contentTask, continueTask);
            }
        }
        private void LoadFromRepository()
        {
            var me = new Tweep(User, Tweep.TweepType.None);
            CachedRepository <TweetBotRuntimeSettings> settingsRepo = CachedRepository <TweetBotRuntimeSettings> .Instance(User.TwitterScreenName);

            SimpleRepository <BotCommand> commandRepo = new SimpleRepository <BotCommand>(User.TwitterScreenName);

            //var runtimeSettings = Newtonsoft.Json.JsonConvert.DeserializeObject<TweetBotRuntimeSettings>(System.IO.File.OpenText("c:\\temp\\runtimesettings.demo.json.txt").ReadToEnd());

            var runtimeSettings = (settingsRepo.Query(RuntimeRepoKey) ?? new List <TweetBotRuntimeSettings> {
                new TweetBotRuntimeSettings()
            }).FirstOrDefault();

            if (runtimeSettings != null)
            {
                IsSimulationMode = runtimeSettings.IsSimulationMode;
                BotStartupTime   = runtimeSettings.BotFirstStart;
                LastTweetTime    = runtimeSettings.LastTweetTime;
                TweetsSentSinceLastFriendRequest = runtimeSettings.TweetsSentSinceLastFriendRequest;
                TweetsPerHour = runtimeSettings.GetPastTweets().Count() > 1 ? runtimeSettings.GetPastTweets()
                                .GroupBy(x => x.CreatedAt.ToShortDateString())
                                .SelectMany(y => y.GroupBy(z => z.CreatedAt.Hour))
                                .Select(x => x.Count())
                                .Average() : 0;
                TweetsPerHourMax = runtimeSettings.GetPastTweets().Count() > 2 ? runtimeSettings.GetPastTweets()
                                   .GroupBy(x => x.CreatedAt.ToShortDateString())
                                   .SelectMany(y => y.GroupBy(z => z.CreatedAt.Hour))
                                   .Select(x => x.Count())
                                   .Max() : 0;
                MinimumRetweetLevel = (int)Math.Ceiling(runtimeSettings.MinimumRetweetLevel);
                CurrentClout        = me.User.FollowersCount;
                FollowerCount       = me.User.FollowersCount;
                FollowingCount      = me.User.FriendsCount;
                TwitterStreamVolume = runtimeSettings.TotalTweetsProcessed / (1.0 * Runtime.TotalMinutes);

                TwitterFollowSuggestions = runtimeSettings.TwitterFollowSuggestions;
                PotentialTweets          = runtimeSettings.GetPotentialTweets().OrderByDescending(t => t.TweetRank).ToList();
                PotentialReTweets        = runtimeSettings.GetPotentialTweets(true).OrderByDescending(t => t.TweetRank).ToList();
                Tweeted = runtimeSettings.GetPastTweets().ToList();
                PotentialFriendRequests = runtimeSettings.PotentialFriendRequests
                                          .Select(x => new KeyValuePair <Tweep, int>(x.Key, x.Count)).ToList();
                KeywordSuggestions = runtimeSettings.KeywordSuggestions
                                     .Select(x => new KeyValuePair <string, int>(x.Key, x.Count)).ToList();
                runtimeSettings.GetPastTweets()
                .Where(t => t.CreatedAt.AddDays(30) >= DateTime.Now)
                .GroupBy(t => t.CreatedAt.Day)
                .Select(g => new { i = g.FirstOrDefault().CreatedAt.Day - 1, date = g.FirstOrDefault().CreatedAt, count = g.Count() })
                .ToList()
                .ForEach(x => TweetsLastThirtyDays[x.i] = x.count);
                TopFriendTweetCounts = runtimeSettings.GetPastTweets()
                                       .Where(t => me.Followers().Select(f => f.ID).Contains(t.User.UserID))
                                       .GroupBy(t => t.User.UserID)
                                       .Select(g => new KeyValuePair <Tweep, int>(new Tweep(g.FirstOrDefault().User, Tweep.TweepType.None), g.Count()))
                                       .ToList();
                SeededKeywords = runtimeSettings.KeywordsToIgnore;
                KeywordsWithOccurrenceCount = runtimeSettings.Keywords
                                              //.Concat(runtimeSettings.KeywordSuggestions.Where(x => x.Count >= TweetBotProcessingStep.MINIMUM_KEYWORD_COUNT))
                                              .OrderByDescending(x => x.Count)
                                              .ThenByDescending(x => x.Key)
                                              .Select(x => new KeyValuePair <string, int>(x.Key, x.Count))
                                              .ToList();
                PotentialKeywordsWithOccurrenceCount = runtimeSettings.KeywordSuggestions
                                                       //.Where(x => x.Count < TweetBotProcessingStep.MINIMUM_KEYWORD_COUNT)
                                                       .Select(x => new KeyValuePair <string, int>(x.Key, x.Count)).ToList();
            }

            var commands = commandRepo.Query(CommandRepoKey, where : x => !x.HasBeenExecuted);

            if (commands != null)
            {
                PendingKeywordAdd    = commands.Where(c => c.Command == BotCommand.CommandType.AddKeyword && !c.HasBeenExecuted).Select(c => c.Value).Distinct().ToList();
                PendingKeywordIgnore = commands.Where(c => c.Command == BotCommand.CommandType.IgnoreKeyword && !c.HasBeenExecuted).Select(c => c.Value).Distinct().ToList();
                PendingTweetRemoval  = commands.Where(c => (c.Command == BotCommand.CommandType.RemovePotentialTweet || c.Command == BotCommand.CommandType.RemovePotentialRetweet) && !c.HasBeenExecuted).Select(c => c.Value).Distinct().ToList();
            }
        }
Beispiel #5
0
        private static List <TweetGroup> CreateGroups(PostworthyUser user, DateTime?day)
        {
            var           repoTweets  = new SimpleRepository <Tweet>(user.TwitterScreenName);
            List <string> screenNames = null;

            screenNames = TwitterModel.Instance(user.TwitterScreenName).GetRelevantScreenNames(user.TwitterScreenName);

            int RetweetThreshold = user.RetweetThreshold;


            DateTime start = day == null?DateTime.Now.AddHours(-48) : day.Value.StartOfDay();

            DateTime end = day == null ? DateTime.Now : day.Value.EndOfDay();

            Func <Tweet, bool> where = t =>
                                       t != null &&
                                       //Should everything be displayed or do you only want content
                                       (user.OnlyTweetsWithLinks == false || (t.Links != null && t.Links.Count > 0)) &&
                                       //Minumum threshold applied so we get results worth seeing (if it is your own tweet it gets a pass on this step)
                                       ((t.RetweetCount > RetweetThreshold || t.User.ScreenName.ToLower() == user.TwitterScreenName.ToLower()) &&
                                       //Apply Date Range
                                        (t.CreatedAt >= start && t.CreatedAt <= end));

            var startGrouping = DateTime.Now;

            Console.WriteLine("{0}: Starting grouping procedure for {1}", startGrouping, user.TwitterScreenName);

            Console.WriteLine("{0}: Fetching tweets for {1}", startGrouping, user.TwitterScreenName);

            var tweets = screenNames
                         //For each screen name (i.e. - you and your friends if included) select the most recent tweets
                         .SelectMany(x => repoTweets.Query(x + TwitterModel.Instance(user.TwitterScreenName).TWEETS, where : where) ?? new List <Tweet>())
                         //Order all tweets based on rank (TweetRank takes into acount many important factors, i.e. - time, mentions, hotness, ect.)
                         .OrderByDescending(t => t.TweetRank)
                         //Just to make sure we are not trying to group a very very large number of items
                         .Take(5000)
                         .ToList();

            Console.WriteLine("{0}: Grouping tweets by similarity for {1}", DateTime.Now, user.TwitterScreenName);

            var groups = tweets
                         //Group similar tweets
                         .GroupSimilar2()
                         //Convert groups into something we can display
                         .Select(g => new TweetGroup(g)
            {
                RepositoryKey = TwitterModel.Instance(user.TwitterScreenName).CONTENT
            })
                         //Order by TweetRank
                         .OrderByDescending(g => g.TweetRank)
                         //Only the top 500
                         .Take(500);

            List <TweetGroup> results = null;

            if (groups != null && groups.Count() > 0)
            {
                //Get Standard Deviation
                double stdev  = 0;
                var    values = groups.Select(x => x.TweetRank);
                double avg    = values.Average();
                stdev = Math.Sqrt(values.Sum(d => (d - avg) * (d - avg)) / values.Count());

                //Filter groups that are way high...
                groups = groups.Where(x => x.TweetRank < (avg + stdev));

                results = groups.ToList();
            }

            var endGrouping = DateTime.Now;

            Console.WriteLine("{0}: Grouping procedure for {1} completed and it took {2} minutes to complete", endGrouping, user.TwitterScreenName, (endGrouping - startGrouping).TotalMinutes);

            return(results ?? new List <TweetGroup>());
        }
Beispiel #6
0
        static void Main(string[] args)
        {
            var connectionString = ConfigurationManager.AppSettings["AzureStorageConnectionString"];
            if (string.IsNullOrEmpty(connectionString))
                throw new Exception("Config Section 'appSettings' missing AzureStorageConnectionString value!");

            var storageAccount = CloudStorageAccount.Parse(connectionString);
            var blobClient = storageAccount.CreateCloudBlobClient();

            var deleteTasks = new List<Task>(10000000);

            Action<Task> delete = t =>
            {
                lock (deleteTasks)
                {
                    deleteTasks.Add(t);
                }
                Console.Clear();
                Console.WriteLine("Deleting " + deleteTasks.Count + " items");
            };

            var version = TwitterModel.VERSION;

            bool content = args.Any(a => a.ToLower() == "content");
            bool cleanVersion = args.Any(a => a.ToLower() == "version");
            bool shrink = args.Any(a => a.ToLower().StartsWith("shrink"));
            int newSize = args.Where(a => a.ToLower().StartsWith("shrink")).Select(x => x == "shrink" ? 500 : int.Parse(x.Replace("shrink", ""))).FirstOrDefault();

            blobClient.ListContainers()
                //.Skip(1) //Short circuit for testing
                //.Take(1) //Short circuit for testing
                .ToList().AsParallel().ForAll(c =>
            {
                #region Index Cleanup
                var index = c.GetDirectoryReference("Index");
                foreach (var b in index.ListBlobs().Where(x => x is CloudBlockBlob).Cast<CloudBlockBlob>())
                {
                    if (cleanVersion && !b.Name.Contains(version))
                    {
                        //Delete Index
                        var i = c.GetBlockBlobReference(b.Name);
                        delete(i.DeleteIfExistsAsync());

                        //Delete all Tweets
                        var d = c.GetDirectoryReference(b.Name.Split('/').Last());
                        foreach (var t in d.ListBlobs().Where(x => x is CloudBlockBlob).Cast<CloudBlockBlob>())
                        {
                            delete(t.DeleteIfExistsAsync());
                        }
                    }

                    if (shrink && newSize > 0 && b.Name.Contains(version))
                    {
                        //Get Storage Index
                        var i = c.GetBlockBlobReference(b.Name);
                        var storageIndex =  Newtonsoft.Json.JsonConvert.DeserializeObject<StorageEntityIndex>(DownloadBlob(i));

                        if (storageIndex.EntityKeys.Count > newSize)
                        {
                            //Delete extra Tweets
                            var d = c.GetDirectoryReference(b.Name.Split('/').Last());
                            foreach (var t in d.ListBlobs().Where(x => x is CloudBlockBlob).Cast<CloudBlockBlob>().OrderByDescending(x => x.Properties.LastModified).Skip(newSize))
                            {
                                storageIndex.EntityKeys.Remove(t.Name.Split('/').Last());
                                delete(t.DeleteIfExistsAsync());
                            }

                            //Update Storage Index
                            UploadBlob(i, storageIndex);
                        }
                    }
                }
                #endregion

                #region Content Cleanup
                if (content)
                {
                    try
                    {
                        var repoIndex = new SimpleRepository<ArticleStubIndex>(c.Name);
                        var repoPage = new SimpleRepository<ArticleStubPage>(c.Name);
                        var stubIndex = repoIndex.Query(TwitterModel.Instance(c.Name).CONTENT_INDEX).FirstOrDefault();
                        if (stubIndex != null)
                        {
                            var remove = new List<KeyValuePair<long, string>>();
                            for (var i = stubIndex.ArticleStubPages.Count - 31; i > -1 && i < stubIndex.ArticleStubPages.Count; i++) // Only the last month(ish)
                            {
                                var si = stubIndex.ArticleStubPages[i];
                                var page = repoPage.Query(TwitterModel.Instance(c.Name).CONTENT + "_" + si.Value).FirstOrDefault();
                                if (page.ArticleStubs == null || page.ArticleStubs.Count == 0)
                                {
                                    repoPage.Delete(TwitterModel.Instance(c.Name).CONTENT + "_" + si.Value);
                                    remove.Add(si);
                                }
                            }
                            remove.ForEach(x => stubIndex.ArticleStubPages.Remove(x));
                            repoIndex.Save(TwitterModel.Instance(c.Name).CONTENT_INDEX, stubIndex);
                        }
                    }
                    catch { }
                }
                #endregion
            });

            Console.WriteLine("Waiting on all tasks to complete");
            Task.WaitAll(deleteTasks.ToArray());
        }
Beispiel #7
0
        static void Main(string[] args)
        {
            if (!EnsureSingleLoad())
            {
                Console.WriteLine("{0}: Another Instance Currently Running", DateTime.Now);
                return;
            }

            var start = DateTime.Now;

            Console.WriteLine("{0}: Started", start);

            var users = UsersCollection.PrimaryUsers() ?? new List <PostworthyUser>();

            var tasks = new List <Task>();

            users.AsParallel().ForAll(u =>
            {
                var tweet     = "";
                var repoIndex = new SimpleRepository <ArticleStubIndex>(u.TwitterScreenName);
                var repoPage  = new SimpleRepository <ArticleStubPage>(u.TwitterScreenName);
                ArticleStubIndex articleStubIndex = null;
                string dayTag = "";
                DateTime day  = DateTime.MinValue;
                if (args.Length > 0)
                {
                    if (DateTime.TryParse(args[0], out day))
                    {
                        day = day.StartOfDay();

                        dayTag           = "_" + day.ToShortDateString();
                        articleStubIndex = repoIndex.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX).FirstOrDefault() ?? new ArticleStubIndex();
                        if (articleStubIndex.ArticleStubPages.Where(x => x.Key == day.ToFileTimeUtc()).Count() == 0)
                        {
                            articleStubIndex.ArticleStubPages.Add(new KeyValuePair <long, string>(day.ToFileTimeUtc(), day.ToShortDateString()));
                        }
                        else
                        {
                            articleStubIndex = null;
                        }
                    }
                }
                else
                {
                    articleStubIndex = repoIndex.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX).FirstOrDefault() ?? new ArticleStubIndex();
                    day = DateTime.Now.AddDays(-1);
                    day = day.StartOfDay();
                    if (articleStubIndex.ArticleStubPages.Where(x => x.Key == day.ToFileTimeUtc()).Count() == 0)
                    {
                        dayTag = "_" + day.ToShortDateString();
                        articleStubIndex.ArticleStubPages.Add(new KeyValuePair <long, string>(day.ToFileTimeUtc(), day.ToShortDateString()));
                        var domain = u.PrimaryDomains.OrderBy(x => x.Length).FirstOrDefault();
                        if (!string.IsNullOrEmpty(domain) && !domain.StartsWith("beta"))
                        {
                            tweet = "Here are the top articles from " + day.ToShortDateString().Replace('/', '-') + " http://" + domain + "/" + day.ToShortDateString().Replace('/', '-');
                        }
                    }
                    else
                    {
                        articleStubIndex = null;
                        day    = DateTime.MinValue;
                        dayTag = "";
                    }
                }


                var groupingResults = CreateGroups(u, day == DateTime.MinValue ? null : (DateTime?)day);
                var existing        = repoPage.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag).FirstOrDefault();
                var contentTask     = CreateContent(u, groupingResults, existing);
                Console.WriteLine("{0}: Waiting on content for {1}", DateTime.Now, u.TwitterScreenName);
                var continueTask = contentTask.ContinueWith(task =>
                {
                    Console.WriteLine("{0}: Content completed for {1}", DateTime.Now, u.TwitterScreenName);
                    var stubs = task.Result.Take(MAX_CONTENT);
                    if (stubs.Count() > 0 || !string.IsNullOrEmpty(dayTag))
                    {
                        var articleStubPage = new ArticleStubPage(1, stubs);

                        if (existing != null && existing.ExcludedArticleStubs.Count > 0)
                        {
                            articleStubPage.ExcludedArticleStubs = existing.ExcludedArticleStubs.Where(e => articleStubPage.ArticleStubs.Contains(e)).ToList();
                        }

                        Console.WriteLine("{0}: Deleting old data from files from storage for {1}", DateTime.Now, u.TwitterScreenName);
                        repoPage.Delete(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag);

                        Console.WriteLine("{0}: Storing data in repository for {1}", DateTime.Now, u.TwitterScreenName);
                        repoPage.Save(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag, articleStubPage);

                        if (articleStubIndex != null)
                        {
                            repoIndex.Save(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX, articleStubIndex);
                        }

                        if (!string.IsNullOrEmpty(tweet))
                        {
                            try
                            {
                                TwitterModel.Instance(u.TwitterScreenName).UpdateStatus(tweet, processStatus: false);
                            }
                            catch (Exception ex) { Console.WriteLine("{0}: Could not tweet message: {1}" + Environment.NewLine + "The following exception was thrown: {2}", DateTime.Now, tweet, ex.ToString()); }
                        }
                    }
                    else
                    {
                        Console.WriteLine("{0}: No articles found for {1}", DateTime.Now, u.TwitterScreenName);
                    }
                });
                tasks.Add(contentTask);
                tasks.Add(continueTask);
            });

            Task.WaitAll(tasks.ToArray());

            var end = DateTime.Now;

            Console.WriteLine("{0}: Ending and it took {1} minutes to complete", end, (end - start).TotalMinutes);
        }
        private void LoadFromRepository()
        {
            var me = new Tweep(User, Tweep.TweepType.None);
            CachedRepository<TweetBotRuntimeSettings> settingsRepo = CachedRepository<TweetBotRuntimeSettings>.Instance(User.TwitterScreenName);
            SimpleRepository<BotCommand> commandRepo = new SimpleRepository<BotCommand>(User.TwitterScreenName);

            //var runtimeSettings = Newtonsoft.Json.JsonConvert.DeserializeObject<TweetBotRuntimeSettings>(System.IO.File.OpenText("c:\\temp\\runtimesettings.demo.json.txt").ReadToEnd());

            var runtimeSettings = (settingsRepo.Query(RuntimeRepoKey) ?? new List<TweetBotRuntimeSettings> { new TweetBotRuntimeSettings() }).FirstOrDefault();

            if (runtimeSettings != null)
            {
                IsSimulationMode = runtimeSettings.IsSimulationMode;
                BotStartupTime = runtimeSettings.BotFirstStart;
                LastTweetTime = runtimeSettings.LastTweetTime;
                TweetsSentSinceLastFriendRequest = runtimeSettings.TweetsSentSinceLastFriendRequest;
                TweetsPerHour = runtimeSettings.GetPastTweets().Count() > 1 ? runtimeSettings.GetPastTweets()
                    .GroupBy(x => x.CreatedAt.ToShortDateString())
                    .SelectMany(y => y.GroupBy(z => z.CreatedAt.Hour))
                    .Select(x => x.Count())
                    .Average() : 0;
                TweetsPerHourMax = runtimeSettings.GetPastTweets().Count() > 2 ? runtimeSettings.GetPastTweets()
                    .GroupBy(x => x.CreatedAt.ToShortDateString())
                    .SelectMany(y => y.GroupBy(z => z.CreatedAt.Hour))
                    .Select(x => x.Count())
                    .Max() : 0;
                MinimumRetweetLevel = (int)Math.Ceiling(runtimeSettings.MinimumRetweetLevel);
                CurrentClout = me.User.FollowersCount;
                FollowerCount = me.User.FollowersCount;
                FollowingCount = me.User.FriendsCount;
                TwitterStreamVolume = runtimeSettings.TotalTweetsProcessed / (1.0 * Runtime.TotalMinutes);

                TwitterFollowSuggestions = runtimeSettings.TwitterFollowSuggestions;
                PotentialTweets = runtimeSettings.GetPotentialTweets().OrderByDescending(t=>t.TweetRank).ToList();
                PotentialReTweets = runtimeSettings.GetPotentialTweets(true).OrderByDescending(t => t.TweetRank).ToList();
                Tweeted = runtimeSettings.GetPastTweets().ToList();
                PotentialFriendRequests = runtimeSettings.PotentialFriendRequests
                    .Select(x => new KeyValuePair<Tweep, int>(x.Key, x.Count)).ToList();
                KeywordSuggestions = runtimeSettings.KeywordSuggestions
                    .Select(x => new KeyValuePair<string, int>(x.Key, x.Count)).ToList();
                runtimeSettings.GetPastTweets()
                    .Where(t => t.CreatedAt.AddDays(30) >= DateTime.Now)
                    .GroupBy(t => t.CreatedAt.Day)
                    .Select(g => new { i = g.FirstOrDefault().CreatedAt.Day - 1, date = g.FirstOrDefault().CreatedAt, count = g.Count() })
                    .ToList()
                    .ForEach(x => TweetsLastThirtyDays[x.i] = x.count);
                TopFriendTweetCounts = runtimeSettings.GetPastTweets()
                    .Where(t => me.Followers().Select(f => f.ID).Contains(t.User.UserID))
                    .GroupBy(t => t.User.UserID)
                    .Select(g => new KeyValuePair<Tweep, int>(new Tweep(g.FirstOrDefault().User, Tweep.TweepType.None), g.Count()))
                    .ToList();
                SeededKeywords = runtimeSettings.KeywordsToIgnore;
                KeywordsWithOccurrenceCount = runtimeSettings.Keywords
                    //.Concat(runtimeSettings.KeywordSuggestions.Where(x => x.Count >= TweetBotProcessingStep.MINIMUM_KEYWORD_COUNT))
                    .OrderByDescending(x => x.Count)
                    .ThenByDescending(x => x.Key)
                    .Select(x => new KeyValuePair<string, int>(x.Key, x.Count))
                    .ToList();
                PotentialKeywordsWithOccurrenceCount = runtimeSettings.KeywordSuggestions
                    //.Where(x => x.Count < TweetBotProcessingStep.MINIMUM_KEYWORD_COUNT)
                    .Select(x => new KeyValuePair<string, int>(x.Key, x.Count)).ToList();
            }

            var commands = commandRepo.Query(CommandRepoKey, where: x => !x.HasBeenExecuted);

            if (commands != null)
            {
                PendingKeywordAdd = commands.Where(c => c.Command == BotCommand.CommandType.AddKeyword && !c.HasBeenExecuted).Select(c => c.Value).Distinct().ToList();
                PendingKeywordIgnore = commands.Where(c => c.Command == BotCommand.CommandType.IgnoreKeyword && !c.HasBeenExecuted).Select(c => c.Value).Distinct().ToList();
                PendingTweetRemoval = commands.Where(c => (c.Command == BotCommand.CommandType.RemovePotentialTweet || c.Command == BotCommand.CommandType.RemovePotentialRetweet) && !c.HasBeenExecuted).Select(c => c.Value).Distinct().ToList();
            }
        }
Beispiel #9
0
        private void ExecutePendingCommands()
        {
            var unexecutedCommands = commandRepo.Query(CommandRepoKey, where : x => !x.HasBeenExecuted);

            if (unexecutedCommands != null)
            {
                foreach (var command in unexecutedCommands)
                {
                    switch (command.Command)
                    {
                    case BotCommand.CommandType.Refresh:
                        //We dont have to do anything since the data is saved to repo below...
                        break;

                    case BotCommand.CommandType.AddKeyword:
                        if (!RuntimeSettings.KeywordsManuallyAdded.Contains(command.Value))
                        {
                            RuntimeSettings.KeywordsManuallyAdded.Add(command.Value);
                            RuntimeSettings.Keywords.Add(new CountableItem(command.Value, 0));
                            RuntimeSettings.KeywordsManuallyIgnored.Remove(command.Value);
                            hasNewKeywordSuggestions = true;
                        }
                        break;

                    case BotCommand.CommandType.IgnoreKeyword:
                        if (!RuntimeSettings.KeywordsManuallyIgnored.Contains(command.Value))
                        {
                            RuntimeSettings.KeywordsManuallyIgnored.Add(command.Value);

                            RuntimeSettings.Keywords.Remove(RuntimeSettings.Keywords.Where(x => x.Key == command.Value).FirstOrDefault());

                            var shouldResetKeywords = RuntimeSettings.KeywordsManuallyAdded.Remove(command.Value);
                            //|| RuntimeSettings.KeywordSuggestions.Remove(RuntimeSettings.KeywordSuggestions.Where(x => x.Key == command.Value).FirstOrDefault());

                            if (shouldResetKeywords)
                            {
                                hasNewKeywordSuggestions = true;
                            }
                        }
                        break;

                    case BotCommand.CommandType.IgnoreTweep:
                        var tweepIgnore = RuntimeSettings.PotentialFriendRequests.Where(x => x.Key.UniqueKey == command.Value).FirstOrDefault();
                        if (tweepIgnore != null)
                        {
                            tweepIgnore.Key.Type = Tweep.TweepType.IgnoreAlways;
                        }
                        break;

                    case BotCommand.CommandType.TargetTweep:
                        var tweepTarget = RuntimeSettings.PotentialFriendRequests.Where(x => x.Key.UniqueKey == command.Value).FirstOrDefault();
                        if (tweepTarget != null)
                        {
                            tweepTarget.Key.Type = Tweep.TweepType.Target;
                        }
                        break;

                    case BotCommand.CommandType.RemovePotentialRetweet:
                        var retweet = RuntimeSettings.GetPotentialTweets(true).Where(x => x.UniqueKey == command.Value).FirstOrDefault();
                        RuntimeSettings.RemovePotentialTweet(retweet, true);
                        break;

                    case BotCommand.CommandType.RemovePotentialTweet:
                        var tweet = RuntimeSettings.GetPotentialTweets().Where(x => x.UniqueKey == command.Value).FirstOrDefault();
                        RuntimeSettings.RemovePotentialTweet(tweet);
                        break;
                    }

                    command.HasBeenExecuted = true;
                }

                commandRepo.Save(CommandRepoKey, unexecutedCommands);
                settingsRepo.Save(RuntimeRepoKey, RuntimeSettings);
            }
        }
Beispiel #10
0
        private static List<TweetGroup> CreateGroups(PostworthyUser user, DateTime? day)
        {
            var repoTweets = new SimpleRepository<Tweet>(user.TwitterScreenName);
            List<string> screenNames = null;

            screenNames = TwitterModel.Instance(user.TwitterScreenName).GetRelevantScreenNames(user.TwitterScreenName);

            int RetweetThreshold = user.RetweetThreshold;


            DateTime start = day == null ? DateTime.Now.AddHours(-48) : day.Value.StartOfDay();
            DateTime end = day == null ? DateTime.Now : day.Value.EndOfDay();

            Func<Tweet, bool> where = t =>
                t != null &&
                    //Should everything be displayed or do you only want content
                (user.OnlyTweetsWithLinks == false || (t.Links != null && t.Links.Count > 0)) &&
                    //Minumum threshold applied so we get results worth seeing (if it is your own tweet it gets a pass on this step)
                ((t.RetweetCount > RetweetThreshold || t.User.ScreenName.ToLower() == user.TwitterScreenName.ToLower()) &&
                    //Apply Date Range
                (t.CreatedAt >= start && t.CreatedAt <= end));

            var startGrouping = DateTime.Now;

            Console.WriteLine("{0}: Starting grouping procedure for {1}", startGrouping, user.TwitterScreenName);

            Console.WriteLine("{0}: Fetching tweets for {1}", startGrouping, user.TwitterScreenName);

            var tweets = screenNames
                //For each screen name (i.e. - you and your friends if included) select the most recent tweets
                .SelectMany(x => repoTweets.Query(x + TwitterModel.Instance(user.TwitterScreenName).TWEETS, where: where) ?? new List<Tweet>())
                //Order all tweets based on rank (TweetRank takes into acount many important factors, i.e. - time, mentions, hotness, ect.)
                .OrderByDescending(t => t.TweetRank)
                //Just to make sure we are not trying to group a very very large number of items
                .Take(5000)
                .ToList();

            Console.WriteLine("{0}: Grouping tweets by similarity for {1}", DateTime.Now, user.TwitterScreenName);

            var groups = tweets
                //Group similar tweets 
                .GroupSimilar2()
                //Convert groups into something we can display
                .Select(g => new TweetGroup(g) { RepositoryKey = TwitterModel.Instance(user.TwitterScreenName).CONTENT })
                //Order by TweetRank
                .OrderByDescending(g => g.TweetRank)
                //Only the top 500
                .Take(500);

            List<TweetGroup> results = null;

            if (groups != null && groups.Count() > 0)
            {
                //Get Standard Deviation
                double stdev = 0;
                var values = groups.Select(x => x.TweetRank);
                double avg = values.Average();
                stdev = Math.Sqrt(values.Sum(d => (d - avg) * (d - avg)) / values.Count());

                //Filter groups that are way high...
                groups = groups.Where(x => x.TweetRank < (avg + stdev));

                results = groups.ToList();
            }

            var endGrouping = DateTime.Now;
            Console.WriteLine("{0}: Grouping procedure for {1} completed and it took {2} minutes to complete", endGrouping, user.TwitterScreenName, (endGrouping - startGrouping).TotalMinutes);

            return results ?? new List<TweetGroup>();
        }
Beispiel #11
0
        static void Main(string[] args)
        {
            if (!EnsureSingleLoad())
            {
                Console.WriteLine("{0}: Another Instance Currently Running", DateTime.Now);
                return;
            }

            var start = DateTime.Now;
            Console.WriteLine("{0}: Started", start);

            var users = UsersCollection.PrimaryUsers() ?? new List<PostworthyUser>();

            var tasks = new List<Task>();

            users.AsParallel().ForAll(u =>
            {
                var tweet = "";
                var repoIndex = new SimpleRepository<ArticleStubIndex>(u.TwitterScreenName);
                var repoPage = new SimpleRepository<ArticleStubPage>(u.TwitterScreenName);
                ArticleStubIndex articleStubIndex = null;
                string dayTag = "";
                DateTime day = DateTime.MinValue;
                if (args.Length > 0)
                {
                    if (DateTime.TryParse(args[0], out day))
                    {
                        day = day.StartOfDay();

                        dayTag = "_" + day.ToShortDateString();
                        articleStubIndex = repoIndex.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX).FirstOrDefault() ?? new ArticleStubIndex();
                        if (articleStubIndex.ArticleStubPages.Where(x => x.Key == day.ToFileTimeUtc()).Count() == 0)
                            articleStubIndex.ArticleStubPages.Add(new KeyValuePair<long, string>(day.ToFileTimeUtc(), day.ToShortDateString()));
                        else
                            articleStubIndex = null;
                    }
                }
                else
                {
                    articleStubIndex = repoIndex.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX).FirstOrDefault() ?? new ArticleStubIndex();
                    day = DateTime.Now.AddDays(-1);
                    day = day.StartOfDay();
                    if (articleStubIndex.ArticleStubPages.Where(x => x.Key == day.ToFileTimeUtc()).Count() == 0)
                    {
                        dayTag = "_" + day.ToShortDateString();
                        articleStubIndex.ArticleStubPages.Add(new KeyValuePair<long, string>(day.ToFileTimeUtc(), day.ToShortDateString()));
                        var domain = u.PrimaryDomains.OrderBy(x => x.Length).FirstOrDefault();
                        if (!string.IsNullOrEmpty(domain) && !domain.StartsWith("beta"))
                            tweet = "Here are the top articles from " + day.ToShortDateString().Replace('/', '-') + " http://" + domain + "/" + day.ToShortDateString().Replace('/', '-');
                    }
                    else
                    {
                        articleStubIndex = null;
                        day = DateTime.MinValue;
                        dayTag = "";
                    }
                }


                var groupingResults = CreateGroups(u, day == DateTime.MinValue ? null : (DateTime?)day);
                var existing = repoPage.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag).FirstOrDefault();
                var contentTask = CreateContent(u, groupingResults, existing);
                Console.WriteLine("{0}: Waiting on content for {1}", DateTime.Now, u.TwitterScreenName);
                var continueTask = contentTask.ContinueWith(task =>
                {
                    Console.WriteLine("{0}: Content completed for {1}", DateTime.Now, u.TwitterScreenName);
                    var stubs = task.Result.Take(MAX_CONTENT);
                    if (stubs.Count() > 0 || !string.IsNullOrEmpty(dayTag))
                    {
                        var articleStubPage = new ArticleStubPage(1, stubs);

                        if (existing != null && existing.ExcludedArticleStubs.Count > 0)
                        {
                            articleStubPage.ExcludedArticleStubs = existing.ExcludedArticleStubs.Where(e => articleStubPage.ArticleStubs.Contains(e)).ToList();
                        }

                        Console.WriteLine("{0}: Deleting old data from files from storage for {1}", DateTime.Now, u.TwitterScreenName);
                        repoPage.Delete(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag);

                        Console.WriteLine("{0}: Storing data in repository for {1}", DateTime.Now, u.TwitterScreenName);
                        repoPage.Save(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag, articleStubPage);

                        if (articleStubIndex != null)
                            repoIndex.Save(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX, articleStubIndex);

                        if (!string.IsNullOrEmpty(tweet))
                        {
                            try
                            {
                                TwitterModel.Instance(u.TwitterScreenName).UpdateStatus(tweet, processStatus: false);
                            }
                            catch(Exception ex) { Console.WriteLine("{0}: Could not tweet message: {1}" + Environment.NewLine + "The following exception was thrown: {2}", DateTime.Now, tweet, ex.ToString()); }
                        }
                    }
                    else
                        Console.WriteLine("{0}: No articles found for {1}", DateTime.Now, u.TwitterScreenName);
                });
                tasks.Add(contentTask);
                tasks.Add(continueTask);
            });

            Task.WaitAll(tasks.ToArray());

            var end = DateTime.Now;
            Console.WriteLine("{0}: Ending and it took {1} minutes to complete", end, (end - start).TotalMinutes);
        }