public static Stats GenerateStats(List<Tweet> tweetsRaw) { var allTweets = tweetsRaw.OrderBy(t => t.CreatedAt).ToArray(); var stats = new Stats(); if (allTweets.Length == 0) { return stats; } stats.OwnerAccount = allTweets.First().User; // A. GENERAL // A.1. Total tweets stats.TotalTweetCount = allTweets.Length; // A.2. Tweets per day stats.FirstTweet = allTweets.FirstOrDefault(t => t.RetweetedStatus == null); stats.LastTweet = allTweets.LastOrDefault(t => t.RetweetedStatus == null); if (stats.FirstTweet != null && stats.LastTweet != null) { stats.LifeSpan = stats.LastTweet.CreatedAt - stats.FirstTweet.CreatedAt; stats.TweetsPerDay = stats.TotalTweetCount / stats.LifeSpan.TotalDays; } // A.3. Clients used var mostUsedClient = allTweets.GroupBy(t => t.SourceName).OrderByDescending(g => g.Count()).FirstOrDefault(); if (mostUsedClient != null && mostUsedClient.Any()) { stats.MostUsedClientName = mostUsedClient.Key; stats.MostUsedClientAddress = mostUsedClient.First().SourceAddress; stats.MostUsedClientCount = mostUsedClient.Count(); } // B. RETWEETS // B.1. Total retweets stats.RetweetCount = allTweets.Count(t => t.RetweetedStatus != null); // B.2. Retweets per day if (stats.LifeSpan.TotalDays > 0d) { stats.RetweetsPerDay = stats.RetweetCount / stats.LifeSpan.TotalDays; } // B.3. Most retweeted account var retweetedTweets = allTweets.Where(t => t.RetweetedStatus != null).ToArray(); var mostRetweetedAccount = retweetedTweets .Select(t => t.RetweetedStatus) .GroupBy(rts => rts.User.ScreenName) .OrderByDescending(g => g.Count()) .FirstOrDefault(); if (mostRetweetedAccount != null) { stats.MostRetweetedAccountName = mostRetweetedAccount.Key; stats.MostRetweetedAccountCount = mostRetweetedAccount.Count(); } // B.4. Fastest retweet var fastestRetweet = retweetedTweets .Select(t => new { Tweet = t, Delta = t.CreatedAt - t.RetweetedStatus.CreatedAt }) .OrderBy(x => x.Delta) .FirstOrDefault(); if (fastestRetweet != null) { stats.FastestRetweet = fastestRetweet.Tweet; stats.FastestRetweetSpan = fastestRetweet.Delta; } // C. MENTIONS & REPLIES // C.1. Mention count var mentions = allTweets.SelectMany(t => t.Entities.UserMentions).ToArray(); stats.MentionCount = mentions.Length; // C.2. Reply count var replies = mentions.Where(um => um.IsReply).ToArray(); stats.ReplyCount = replies.Length; // C.3. Most mentioned account var mostMentionedAccount = mentions .GroupBy(um => um.ScreenName) .OrderByDescending(g => g.Count()) .FirstOrDefault(); if (mostMentionedAccount != null) { stats.MostMentionedAccountName = mostMentionedAccount.Key; stats.MostMentionedAccountCount = mostMentionedAccount.Count(); } // C.4. Most replied account var mostRepliedAccount = replies .GroupBy(um => um.ScreenName) .OrderByDescending(g => g.Count()) .FirstOrDefault(); if (mostRepliedAccount != null) { stats.MostRepliedAccountName = mostRepliedAccount.Key; stats.MostRepliedAccountCount = mostRepliedAccount.Count(); } // D. HASHTAGS // D.1. Most used hashtag var hashtags = allTweets.SelectMany(t => t.Entities.Hashtags).ToArray(); var mostUsedHashtag = hashtags .GroupBy(ht => ht.Text) .OrderByDescending(g => g.Count()) .FirstOrDefault(); if (mostUsedHashtag != null) { stats.MostUsedHashtagText = mostUsedHashtag.Key; stats.MostUsedHashtagCount = mostUsedHashtag.Count(); } // D.2. Longest hashtag var hashtagsSortedByLength = hashtags .Select(ht => ht.Text) .OrderBy(ht => ht.Length); stats.LongestHastag = hashtagsSortedByLength.LastOrDefault(); // D.3. Shortest hashtag stats.ShortestHastag = hashtagsSortedByLength.FirstOrDefault(); // E. DATE & TIME // E.1. Most tweeted day of week var tweetsGroupedByDay = allTweets .Select(t => t.CreatedAt.DayOfWeek) .GroupBy(d => d) .OrderBy(g => g.Count()) .ToArray(); var mostTweetedDayOfWeek = tweetsGroupedByDay.LastOrDefault(); if (mostTweetedDayOfWeek != null) { stats.MostTweetedDayOfWeekName = mostTweetedDayOfWeek.Key; stats.MostTweetedDayOfWeekCount = mostTweetedDayOfWeek.Count(); } // E.2. Most tweeted day var mostTweetedDay = allTweets .GroupBy(t => t.CreatedAt.Date) .OrderByDescending(g => g.Count()) .FirstOrDefault(); if (mostTweetedDay != null) { stats.MostTweetedDay = mostTweetedDay.Key; stats.MostTweetedDayCount = mostTweetedDay.Count(); } // E.3. Least tweeted day var leastTweetedDay = tweetsGroupedByDay.FirstOrDefault(); if (leastTweetedDay != null) { stats.LeastTweetedDayOfWeekName = leastTweetedDay.Key; stats.LeastTweetedDayCount = leastTweetedDay.Count(); } // E.5. Most tweeted month stats.MonthTweetCounts = allTweets .Select(t => Tuple.Create(t.CreatedAt.Year, t.CreatedAt.Month)) .GroupBy(t => t) .Select(g => new MonthTweetCount { Year = g.Key.Item1, Month = g.Key.Item2, Count = g.Count() }) .ToArray(); stats.MostTweetedMonth = stats.MonthTweetCounts.OrderByDescending(mtc => mtc.Count).FirstOrDefault(); // E.5. Most tweeted hour var tweetsGroupedByHour = allTweets .Select(t => t.CreatedAt.Hour) .GroupBy(d => d) .OrderBy(g => g.Count()); var mostTweetedHour = tweetsGroupedByHour.LastOrDefault(); if (mostTweetedHour != null) { stats.MostTweetedHour = mostTweetedHour.Key; stats.MostTweetedHourCount = mostTweetedHour.Count(); } // E.5. Least tweeted hour var leastTweetedHour = tweetsGroupedByHour.FirstOrDefault(); if (leastTweetedHour != null) { stats.LeastTweetedHour = leastTweetedHour.Key; stats.LeastTweetedHourCount = leastTweetedHour.Count(); } // E.6. Longest time not tweeted var previousTweet = stats.FirstTweet; var startTweet = previousTweet; var endTweet = previousTweet; var longestTime = new TimeSpan(); for (var i = 1; i < allTweets.Length; i++) { var currentTweet = allTweets[i]; var delta = currentTweet.CreatedAt - previousTweet.CreatedAt; if (delta > longestTime) { startTweet = previousTweet; endTweet = currentTweet; longestTime = delta; } previousTweet = currentTweet; } stats.LongestTimeNotTweeted = longestTime; stats.NotTweetedStartDate = startTweet.CreatedAt; stats.NotTweetedEndDate = endTweet.CreatedAt; // F. TEXT ANALYSYS // F.1. Total char count stats.TotalCharCount = allTweets.Sum(t => t.Text.Length); // F.2. Average char count if (allTweets.Length > 0) { stats.AverageCharCount = stats.TotalCharCount / allTweets.Length; } // F.3. Total word count var allWords = allTweets .SelectMany(t => t.Text.Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)) .ToArray(); stats.TotalWordCount = allWords.Count(); // F.4. Average word count if (allTweets.Length > 0) { stats.AverageWordCount = stats.TotalWordCount / allTweets.Length; } // F.5. Most used word var mostUsedWord = allWords .Where(w => !"RT".Equals(w)) .GroupBy(w => w).OrderByDescending(g => g.Count()) .FirstOrDefault(); if (mostUsedWord != null) { stats.MostUsedWord = mostUsedWord.Key; stats.MostUsedWordCount = mostUsedWord.Count(); } // F.6. Duplicate tweets var mostDuplicatedTweet = allTweets .GroupBy(t => t.Text) .Where(g => g.Count() > 1) .OrderByDescending(g => g.Count()) .FirstOrDefault(); if (mostDuplicatedTweet != null) { stats.MostDuplicatedTweet = mostDuplicatedTweet.Key; stats.MostDuplicatedTweetCount = mostDuplicatedTweet.Count(); } // G. LINKS // G.1. Link count var allUrls = allTweets.SelectMany(t => t.Entities.Urls).ToArray(); stats.LinkCount = allUrls.Count(); // G.2. Most linked URL var mostLinkedUrl = allUrls .Select(u => u.ExpandedUrl) .GroupBy(u => u) .OrderByDescending(g => g.Count()) .FirstOrDefault(); if (mostLinkedUrl != null) { stats.MostLinkedUrl = mostLinkedUrl.Key; stats.MostLinkedUrlCount = mostLinkedUrl.Count(); } // G.3. Most linked domain var mostLinkedDomain = allUrls .Select(u => new Uri(u.ExpandedUrl)) .GroupBy(u => u.Host) .OrderByDescending(g => g.Count()) .FirstOrDefault(); if (mostLinkedDomain != null) { stats.MostLinkedDomain = mostLinkedDomain.Key; stats.MostLinkedDomainCount = mostLinkedDomain.Count(); } // H. 3RD PARTY foreach (var tweet in allTweets) { int count; if (stats.ThirdPartySourceCounts.TryGetValue(tweet.SourceName, out count)) { stats.ThirdPartySourceCounts[tweet.SourceName] = count + 1; } } return stats; }
private string SaveStats(Stats stats) { var id = Guid.NewGuid().ToString("N"); var stream = new MemoryStream(); using (var zipStream = new GZipStream(stream, CompressionMode.Compress, true)) using (var streamWriter = new StreamWriter(zipStream)) { var jsonSerializer = JsonSerializer.Create(new JsonSerializerSettings()); jsonSerializer.Serialize(streamWriter, stats); } stream.Position = 0; using (stream) using (var client = Amazon.AWSClientFactory.CreateAmazonS3Client(AwsAccessKey, AwsSecretAccessKey)) { var request = new PutObjectRequest(); request.WithBucketName("MyTwitterStats") .WithCannedACL(S3CannedACL.PublicRead) .WithKey(id + ".json.gz").InputStream = stream; client.PutObject(request); } return id; }