Inheritance: RepositoryEntity
        private async Task<List<ArticleStub>> CreateContent(List<TweetGroup> groupingResults, ArticleStubPage existing)
        {
            List<ArticleStub> results = new List<ArticleStub>();

            var contentItems = new List<object>();
            foreach (var result in groupingResults)
            {
                if (contentItems.Count >= MAX_CONTENT)
                    break;

                var existingItem = existing != null ?
                    existing.ArticleStubs.Where(x => result.Links.Select(y => y.Uri).Contains(x.Link)).FirstOrDefault() : null;

                if (existingItem != null)
                {
                    contentItems.Add(existingItem);
                    continue;
                }
                var imageUris = new List<Uri>();
                imageUris = result.Links.Where(l => l.Image != null).Select(l => l.Image).ToList();
                var links = result.Links.OrderByDescending(x => x.ShareCount);
                foreach (var uriex in links)
                {
                    if (uriex.IsHtmlContentUrl)
                    {
                        var doc = new HtmlAgilityPack.HtmlDocument();
                        try
                        {
                            var req = uriex.Uri.GetWebRequest(15000, 15000);
                            using (var resp = await req.GetResponseAsync())
                            {
                                using (var reader = new StreamReader(resp.GetResponseStream(), true))
                                {
                                    doc.Load(reader);
                                }
                            }
                        }
                        catch (Exception ex) { }

                        if (doc.DocumentNode != null)
                        {
                            imageUris.AddRange(ExtractImageUris(uriex, doc));

                            var content = new
                            {
                                Title = uriex.Title,
                                SubTitle = uriex.Description,
                                Link = uriex.Uri,
                                //Image = image == null ? null : ImageManipulation.EncodeImage(image, width, height),
                                Summary = ExtractSummary(uriex.Title + " " + uriex.Description, doc),
                                Video = uriex.Video,
                                Images = imageUris
                            };

                            contentItems.Add(content);
                            break;
                        }
                    }
                }
            }

            var newImages = contentItems
                .Where(x => x.GetType() != typeof(ArticleStub))
                .Select(x => (dynamic)x)
                .SelectMany(x => ((List<Uri>)x.Images).Select(y => new { ID = ((object)x.Title).GetHashCode(), Image = y }))
                .ToList();

            var stubImages = contentItems
                .Where(x => x.GetType() == typeof(ArticleStub))
                .Where(x => ((ArticleStub)x).OriginalImageUri != null)
                .Select(x => new { ID = ((ArticleStub)x).Title.GetHashCode(), Image = ((ArticleStub)x).OriginalImageUri })
                .ToArray();

            if (stubImages != null && stubImages.Length > 0)
                newImages.AddRange(stubImages);

            var allImages = newImages.ToArray();

            var excludedImages = new List<Uri>();
            for (int i = 0; i < allImages.Length - 1; i++)
            {
                var img = allImages[i];
                if (!excludedImages.Contains(img.Image))
                {
                    for (int j = i + 1; j < allImages.Length; j++)
                    {
                        var img2 = allImages[j];
                        if (img.Image == img2.Image && img.ID != img2.ID)
                        {
                            excludedImages.Add(img2.Image);
                            break;
                        }
                    }
                }
            }

            foreach (var obj in contentItems)
            {
                if (obj.GetType() != typeof(ArticleStub))
                {
                    dynamic item = obj;
                    var image = await GetBestImage(((List<Uri>)item.Images ?? new List<Uri>()).Where(y => !excludedImages.Contains(y)));
                    results.Add(new ArticleStub
                    {
                        Title = item.Title,
                        SubTitle = item.SubTitle,
                        Link = item.Link,
                        Image = image != null ? image.Item1 : null,
                        Summary = item.Summary,
                        Video = item.Video,
                        OriginalImageUri = image != null ? image.Item2 : null
                    });
                }
                else if (excludedImages.Contains(((ArticleStub)obj).OriginalImageUri))
                {
                    var item = (ArticleStub)obj;
                    item.Image = null;
                    results.Add(item);
                }
                else
                    results.Add(obj as ArticleStub);
            }

            return results;
        }
        protected override void StoreInRepository(IEnumerable<Twitter.Tweet> tweets)
        {
            var start = DateTime.Now.AddHours(-48);
            var dayTag = "_" + DateTime.Now.ToShortDateString();

            Func<Tweet, bool> where = t =>
                t != null &&
                    //Should everything be displayed or do you only want content
                (User.OnlyTweetsWithLinks == false || (t.Links != null && t.Links.Count > 0)) &&
                    //Minumum threshold applied so we get results worth seeing (if it is your own tweet it gets a pass on this step)
                ((t.RetweetCount > User.RetweetThreshold || t.User.ScreenName.ToLower() == User.TwitterScreenName.ToLower()) &&
                    //Apply Date Range
                (t.CreatedAt >= start));

            Tweets = Tweets.Union(tweets.Where(where)).OrderByDescending(x=>x.TweetRank).Take(MAX_CONTENT).ToList();
            
            var groups = Tweets
                //Group similar tweets 
                .GroupSimilar2()
                //Convert groups into something we can display
                .Select(g => new TweetGroup(g) { RepositoryKey = TwitterModel.Instance(User.TwitterScreenName).CONTENT })
                //Order by TweetRank
                .OrderByDescending(g => g.TweetRank)
                //Only the top content
                .Take(MAX_CONTENT);

            Task<List<ArticleStub>> contentTask = null;
            Task continueTask = null;

            if (groups != null && groups.Count() > 0)
            {
                //Get Standard Deviation
                double stdev = 0;
                var values = groups.Select(x => x.TweetRank);
                double avg = values.Average();
                stdev = Math.Sqrt(values.Sum(d => (d - avg) * (d - avg)) / values.Count());

                //Filter groups that are way high...
                //groups = groups.Where(x => x.TweetRank < (avg + stdev));

                var results = groups.OrderByDescending(x=>x.TweetRank).ToList();
                contentTask = CreateContent(results, Page);
                continueTask = contentTask.ContinueWith(task => {
                    if (task.Result.Count >= 25)
                    {
                        var key = TwitterModel.Instance(screenName).CONTENT.ToLower();
                        Page = new ArticleStubPage(1, task.Result.Take(100));

                        repoPage.Delete(key);
                        repoPage.Save(key, Page);

                        repoPage.Delete(key + dayTag);
                        repoPage.Save(key + dayTag, Page);

                        var articleStubIndex = repoIndex.Query(TwitterModel.Instance(screenName).CONTENT_INDEX).FirstOrDefault() ?? new ArticleStubIndex();
                        var day = DateTime.Now.StartOfDay();
                        if (articleStubIndex.ArticleStubPages.Where(x => x.Key == day.ToFileTimeUtc()).Count() == 0)
                        {
                            articleStubIndex.ArticleStubPages.Add(new KeyValuePair<long, string>(day.ToFileTimeUtc(), day.ToShortDateString()));
                            repoIndex.Save(TwitterModel.Instance(screenName).CONTENT_INDEX, articleStubIndex);
                        }
                    }
                });
            }

            base.StoreInRepository(tweets);

            if (contentTask != null && contentTask != null)
                Task.WaitAll(contentTask, continueTask);
        }
Example #3
0
        static void Main(string[] args)
        {
            if (!EnsureSingleLoad())
            {
                Console.WriteLine("{0}: Another Instance Currently Running", DateTime.Now);
                return;
            }

            var start = DateTime.Now;
            Console.WriteLine("{0}: Started", start);

            var users = UsersCollection.PrimaryUsers() ?? new List<PostworthyUser>();

            var tasks = new List<Task>();

            users.AsParallel().ForAll(u =>
            {
                var tweet = "";
                var repoIndex = new SimpleRepository<ArticleStubIndex>(u.TwitterScreenName);
                var repoPage = new SimpleRepository<ArticleStubPage>(u.TwitterScreenName);
                ArticleStubIndex articleStubIndex = null;
                string dayTag = "";
                DateTime day = DateTime.MinValue;
                if (args.Length > 0)
                {
                    if (DateTime.TryParse(args[0], out day))
                    {
                        day = day.StartOfDay();

                        dayTag = "_" + day.ToShortDateString();
                        articleStubIndex = repoIndex.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX).FirstOrDefault() ?? new ArticleStubIndex();
                        if (articleStubIndex.ArticleStubPages.Where(x => x.Key == day.ToFileTimeUtc()).Count() == 0)
                            articleStubIndex.ArticleStubPages.Add(new KeyValuePair<long, string>(day.ToFileTimeUtc(), day.ToShortDateString()));
                        else
                            articleStubIndex = null;
                    }
                }
                else
                {
                    articleStubIndex = repoIndex.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX).FirstOrDefault() ?? new ArticleStubIndex();
                    day = DateTime.Now.AddDays(-1);
                    day = day.StartOfDay();
                    if (articleStubIndex.ArticleStubPages.Where(x => x.Key == day.ToFileTimeUtc()).Count() == 0)
                    {
                        dayTag = "_" + day.ToShortDateString();
                        articleStubIndex.ArticleStubPages.Add(new KeyValuePair<long, string>(day.ToFileTimeUtc(), day.ToShortDateString()));
                        var domain = u.PrimaryDomains.OrderBy(x => x.Length).FirstOrDefault();
                        if (!string.IsNullOrEmpty(domain) && !domain.StartsWith("beta"))
                            tweet = "Here are the top articles from " + day.ToShortDateString().Replace('/', '-') + " http://" + domain + "/" + day.ToShortDateString().Replace('/', '-');
                    }
                    else
                    {
                        articleStubIndex = null;
                        day = DateTime.MinValue;
                        dayTag = "";
                    }
                }


                var groupingResults = CreateGroups(u, day == DateTime.MinValue ? null : (DateTime?)day);
                var existing = repoPage.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag).FirstOrDefault();
                var contentTask = CreateContent(u, groupingResults, existing);
                Console.WriteLine("{0}: Waiting on content for {1}", DateTime.Now, u.TwitterScreenName);
                var continueTask = contentTask.ContinueWith(task =>
                {
                    Console.WriteLine("{0}: Content completed for {1}", DateTime.Now, u.TwitterScreenName);
                    var stubs = task.Result.Take(MAX_CONTENT);
                    if (stubs.Count() > 0 || !string.IsNullOrEmpty(dayTag))
                    {
                        var articleStubPage = new ArticleStubPage(1, stubs);

                        if (existing != null && existing.ExcludedArticleStubs.Count > 0)
                        {
                            articleStubPage.ExcludedArticleStubs = existing.ExcludedArticleStubs.Where(e => articleStubPage.ArticleStubs.Contains(e)).ToList();
                        }

                        Console.WriteLine("{0}: Deleting old data from files from storage for {1}", DateTime.Now, u.TwitterScreenName);
                        repoPage.Delete(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag);

                        Console.WriteLine("{0}: Storing data in repository for {1}", DateTime.Now, u.TwitterScreenName);
                        repoPage.Save(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag, articleStubPage);

                        if (articleStubIndex != null)
                            repoIndex.Save(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX, articleStubIndex);

                        if (!string.IsNullOrEmpty(tweet))
                        {
                            try
                            {
                                TwitterModel.Instance(u.TwitterScreenName).UpdateStatus(tweet, processStatus: false);
                            }
                            catch(Exception ex) { Console.WriteLine("{0}: Could not tweet message: {1}" + Environment.NewLine + "The following exception was thrown: {2}", DateTime.Now, tweet, ex.ToString()); }
                        }
                    }
                    else
                        Console.WriteLine("{0}: No articles found for {1}", DateTime.Now, u.TwitterScreenName);
                });
                tasks.Add(contentTask);
                tasks.Add(continueTask);
            });

            Task.WaitAll(tasks.ToArray());

            var end = DateTime.Now;
            Console.WriteLine("{0}: Ending and it took {1} minutes to complete", end, (end - start).TotalMinutes);
        }