private async Task <List <ArticleStub> > CreateContent(List <TweetGroup> groupingResults, ArticleStubPage existing) { List <ArticleStub> results = new List <ArticleStub>(); var contentItems = new List <object>(); foreach (var result in groupingResults) { if (contentItems.Count >= MAX_CONTENT) { break; } var existingItem = existing != null? existing.ArticleStubs.Where(x => result.Links.Select(y => y.Uri).Contains(x.Link)).FirstOrDefault() : null; if (existingItem != null) { contentItems.Add(existingItem); continue; } var imageUris = new List <Uri>(); imageUris = result.Links.Where(l => l.Image != null).Select(l => l.Image).ToList(); var links = result.Links.OrderByDescending(x => x.ShareCount); foreach (var uriex in links) { if (uriex.IsHtmlContentUrl) { var doc = new HtmlAgilityPack.HtmlDocument(); try { var req = uriex.Uri.GetWebRequest(15000, 15000); using (var resp = await req.GetResponseAsync()) { using (var reader = new StreamReader(resp.GetResponseStream(), true)) { doc.Load(reader); } } } catch (Exception ex) { } if (doc.DocumentNode != null) { imageUris.AddRange(ExtractImageUris(uriex, doc)); var content = new { Title = uriex.Title, SubTitle = uriex.Description, Link = uriex.Uri, //Image = image == null ? null : ImageManipulation.EncodeImage(image, width, height), Summary = ExtractSummary(uriex.Title + " " + uriex.Description, doc), Video = uriex.Video, Images = imageUris }; contentItems.Add(content); break; } } } } var newImages = contentItems .Where(x => x.GetType() != typeof(ArticleStub)) .Select(x => (dynamic)x) .SelectMany(x => ((List <Uri>)x.Images).Select(y => new { ID = ((object)x.Title).GetHashCode(), Image = y })) .ToList(); var stubImages = contentItems .Where(x => x.GetType() == typeof(ArticleStub)) .Where(x => ((ArticleStub)x).OriginalImageUri != null) .Select(x => new { ID = ((ArticleStub)x).Title.GetHashCode(), Image = ((ArticleStub)x).OriginalImageUri }) .ToArray(); if (stubImages != null && stubImages.Length > 0) { newImages.AddRange(stubImages); } var allImages = newImages.ToArray(); var excludedImages = new List <Uri>(); for (int i = 0; i < allImages.Length - 1; i++) { var img = allImages[i]; if (!excludedImages.Contains(img.Image)) { for (int j = i + 1; j < allImages.Length; j++) { var img2 = allImages[j]; if (img.Image == img2.Image && img.ID != img2.ID) { excludedImages.Add(img2.Image); break; } } } } foreach (var obj in contentItems) { if (obj.GetType() != typeof(ArticleStub)) { dynamic item = obj; var image = await GetBestImage(((List <Uri>)item.Images ?? new List <Uri>()).Where(y => !excludedImages.Contains(y))); results.Add(new ArticleStub { Title = item.Title, SubTitle = item.SubTitle, Link = item.Link, Image = image != null ? image.Item1 : null, Summary = item.Summary, Video = item.Video, OriginalImageUri = image != null ? image.Item2 : null }); } else if (excludedImages.Contains(((ArticleStub)obj).OriginalImageUri)) { var item = (ArticleStub)obj; item.Image = null; results.Add(item); } else { results.Add(obj as ArticleStub); } } return(results); }
protected override void StoreInRepository(IEnumerable <Twitter.Tweet> tweets) { var start = DateTime.Now.AddHours(-48); var dayTag = "_" + DateTime.Now.ToShortDateString(); Func <Tweet, bool> where = t => t != null && //Should everything be displayed or do you only want content (User.OnlyTweetsWithLinks == false || (t.Links != null && t.Links.Count > 0)) && //Minumum threshold applied so we get results worth seeing (if it is your own tweet it gets a pass on this step) ((t.RetweetCount > User.RetweetThreshold || t.User.ScreenName.ToLower() == User.TwitterScreenName.ToLower()) && //Apply Date Range (t.CreatedAt >= start)); Tweets = Tweets.Union(tweets.Where(where)).OrderByDescending(x => x.TweetRank).Take(MAX_CONTENT).ToList(); var groups = Tweets //Group similar tweets .GroupSimilar2() //Convert groups into something we can display .Select(g => new TweetGroup(g) { RepositoryKey = TwitterModel.Instance(User.TwitterScreenName).CONTENT }) //Order by TweetRank .OrderByDescending(g => g.TweetRank) //Only the top content .Take(MAX_CONTENT); Task <List <ArticleStub> > contentTask = null; Task continueTask = null; if (groups != null && groups.Count() > 0) { //Get Standard Deviation double stdev = 0; var values = groups.Select(x => x.TweetRank); double avg = values.Average(); stdev = Math.Sqrt(values.Sum(d => (d - avg) * (d - avg)) / values.Count()); //Filter groups that are way high... //groups = groups.Where(x => x.TweetRank < (avg + stdev)); var results = groups.OrderByDescending(x => x.TweetRank).ToList(); contentTask = CreateContent(results, Page); continueTask = contentTask.ContinueWith(task => { if (task.Result.Count >= 25) { var key = TwitterModel.Instance(screenName).CONTENT.ToLower(); Page = new ArticleStubPage(1, task.Result.Take(100)); repoPage.Delete(key); repoPage.Save(key, Page); repoPage.Delete(key + dayTag); repoPage.Save(key + dayTag, Page); var articleStubIndex = repoIndex.Query(TwitterModel.Instance(screenName).CONTENT_INDEX).FirstOrDefault() ?? new ArticleStubIndex(); var day = DateTime.Now.StartOfDay(); if (articleStubIndex.ArticleStubPages.Where(x => x.Key == day.ToFileTimeUtc()).Count() == 0) { articleStubIndex.ArticleStubPages.Add(new KeyValuePair <long, string>(day.ToFileTimeUtc(), day.ToShortDateString())); repoIndex.Save(TwitterModel.Instance(screenName).CONTENT_INDEX, articleStubIndex); } } }); } base.StoreInRepository(tweets); if (contentTask != null && contentTask != null) { Task.WaitAll(contentTask, continueTask); } }
static void Main(string[] args) { if (!EnsureSingleLoad()) { Console.WriteLine("{0}: Another Instance Currently Running", DateTime.Now); return; } var start = DateTime.Now; Console.WriteLine("{0}: Started", start); var users = UsersCollection.PrimaryUsers() ?? new List <PostworthyUser>(); var tasks = new List <Task>(); users.AsParallel().ForAll(u => { var tweet = ""; var repoIndex = new SimpleRepository <ArticleStubIndex>(u.TwitterScreenName); var repoPage = new SimpleRepository <ArticleStubPage>(u.TwitterScreenName); ArticleStubIndex articleStubIndex = null; string dayTag = ""; DateTime day = DateTime.MinValue; if (args.Length > 0) { if (DateTime.TryParse(args[0], out day)) { day = day.StartOfDay(); dayTag = "_" + day.ToShortDateString(); articleStubIndex = repoIndex.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX).FirstOrDefault() ?? new ArticleStubIndex(); if (articleStubIndex.ArticleStubPages.Where(x => x.Key == day.ToFileTimeUtc()).Count() == 0) { articleStubIndex.ArticleStubPages.Add(new KeyValuePair <long, string>(day.ToFileTimeUtc(), day.ToShortDateString())); } else { articleStubIndex = null; } } } else { articleStubIndex = repoIndex.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX).FirstOrDefault() ?? new ArticleStubIndex(); day = DateTime.Now.AddDays(-1); day = day.StartOfDay(); if (articleStubIndex.ArticleStubPages.Where(x => x.Key == day.ToFileTimeUtc()).Count() == 0) { dayTag = "_" + day.ToShortDateString(); articleStubIndex.ArticleStubPages.Add(new KeyValuePair <long, string>(day.ToFileTimeUtc(), day.ToShortDateString())); var domain = u.PrimaryDomains.OrderBy(x => x.Length).FirstOrDefault(); if (!string.IsNullOrEmpty(domain) && !domain.StartsWith("beta")) { tweet = "Here are the top articles from " + day.ToShortDateString().Replace('/', '-') + " http://" + domain + "/" + day.ToShortDateString().Replace('/', '-'); } } else { articleStubIndex = null; day = DateTime.MinValue; dayTag = ""; } } var groupingResults = CreateGroups(u, day == DateTime.MinValue ? null : (DateTime?)day); var existing = repoPage.Query(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag).FirstOrDefault(); var contentTask = CreateContent(u, groupingResults, existing); Console.WriteLine("{0}: Waiting on content for {1}", DateTime.Now, u.TwitterScreenName); var continueTask = contentTask.ContinueWith(task => { Console.WriteLine("{0}: Content completed for {1}", DateTime.Now, u.TwitterScreenName); var stubs = task.Result.Take(MAX_CONTENT); if (stubs.Count() > 0 || !string.IsNullOrEmpty(dayTag)) { var articleStubPage = new ArticleStubPage(1, stubs); if (existing != null && existing.ExcludedArticleStubs.Count > 0) { articleStubPage.ExcludedArticleStubs = existing.ExcludedArticleStubs.Where(e => articleStubPage.ArticleStubs.Contains(e)).ToList(); } Console.WriteLine("{0}: Deleting old data from files from storage for {1}", DateTime.Now, u.TwitterScreenName); repoPage.Delete(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag); Console.WriteLine("{0}: Storing data in repository for {1}", DateTime.Now, u.TwitterScreenName); repoPage.Save(TwitterModel.Instance(u.TwitterScreenName).CONTENT + dayTag, articleStubPage); if (articleStubIndex != null) { repoIndex.Save(TwitterModel.Instance(u.TwitterScreenName).CONTENT_INDEX, articleStubIndex); } if (!string.IsNullOrEmpty(tweet)) { try { TwitterModel.Instance(u.TwitterScreenName).UpdateStatus(tweet, processStatus: false); } catch (Exception ex) { Console.WriteLine("{0}: Could not tweet message: {1}" + Environment.NewLine + "The following exception was thrown: {2}", DateTime.Now, tweet, ex.ToString()); } } } else { Console.WriteLine("{0}: No articles found for {1}", DateTime.Now, u.TwitterScreenName); } }); tasks.Add(contentTask); tasks.Add(continueTask); }); Task.WaitAll(tasks.ToArray()); var end = DateTime.Now; Console.WriteLine("{0}: Ending and it took {1} minutes to complete", end, (end - start).TotalMinutes); }