public void Execute() { int count = 0; WebResponseBuilder responseBuilder = new WebResponseBuilder(); string twitter_api_username = ConfigurationManager.AppSettings["twitter_user"]; string twitter_api_password = ConfigurationManager.AppSettings["twitter_pass"]; responseBuilder.UseCGICredentials(twitter_api_username, twitter_api_password); TwitterStreamStatusProvider provider = new TwitterStreamStatusProvider(responseBuilder); provider.YieldThisMany = NUM_STATUSES_TO_PULL; Console.Out.WriteLine("Parsing english model trigrams from source data in 'english_data' directory"); EnglishStatusProvider englishProvider = new EnglishStatusProvider(provider, "english_data"); englishProvider.Threshold = ENGLISH_THRESHOLD; Console.Out.WriteLine("About to start reading from twitter - up to " + NUM_STATUSES_TO_PULL + " statuses."); XtractDataContext db = new XtractDataContext(); foreach (TwitterStatus status in englishProvider.GetMessages()) { string screen_name = status.user.screen_name; var existing = db.Twusers.Where(u => u.screen_name == screen_name); if (existing.Count() == 0) { Twuser user = Twuser.From(status.user); user.english_similarity = status.english_similarity; db.Twusers.InsertOnSubmit(user); } else { Twuser user = existing.First(); if (user.english_similarity < status.english_similarity) { user.english_similarity = status.english_similarity; } } Tweet tweet = new Tweet(); tweet.english_similarity = status.english_similarity; tweet.screen_name = status.user.screen_name; tweet.text = status.text; tweet.twitter_id = status.id; tweet.sample_reason = SampleReason.sample_stream.ToString(); db.Tweets.InsertOnSubmit(tweet); db.SubmitChanges(); if (count++ > UPDATE_EVERY) { Console.Out.WriteLine("Wrote " + count + " tweets."); } } }
public void Setup() { WebResponseBuilder responseBuilder = new WebResponseBuilder(); string twitter_api_username = ConfigurationManager.AppSettings["twitter_user"]; string twitter_api_password = ConfigurationManager.AppSettings["twitter_pass"]; responseBuilder.UseCGICredentials(twitter_api_username, twitter_api_password); TwitterStreamStatusProvider twitterStatusProvider = new TwitterStreamStatusProvider(responseBuilder); twitterStatusProvider.YieldThisMany = 5; _target = new EnglishStatusProvider(twitterStatusProvider, "data"); _target.Threshold = 0.05d; }
// Methods private static void Main(string[] args) { FileStream statusFile = new FileStream("status_dump.txt", FileMode.OpenOrCreate, FileAccess.Write); StreamWriter statusWriter = new StreamWriter(statusFile); int count = 0; try { WebResponseBuilder responseBuilder = new WebResponseBuilder(); string twitter_api_username = ConfigurationManager.AppSettings["twitter_user"]; string twitter_api_password = ConfigurationManager.AppSettings["twitter_pass"]; responseBuilder.UseCGICredentials(twitter_api_username, twitter_api_password); TwitterStreamStatusProvider provider = new TwitterStreamStatusProvider(responseBuilder); provider.YieldThisMany = NUM_STATUSES_TO_PULL; Console.Out.WriteLine("About to start reading from twitter - up to " + NUM_STATUSES_TO_PULL + " statuses."); //expensive operation.. EnglishStatusProvider englishProvider = new EnglishStatusProvider(provider, "english_data"); SortedDictionary<long, int> userMsgCounts = new SortedDictionary<long, int>(); SortedDictionary<long, string> usernameLookup = new SortedDictionary<long, string>(); foreach (TwitterStatus status in englishProvider.GetMessages()) { string username = status.user.screen_name; long user_id = status.user.id; if (!usernameLookup.ContainsKey(user_id)) { usernameLookup.Add(user_id, username); userMsgCounts.Add(user_id, 1); } else { userMsgCounts[user_id] += 1; } string text = status.text.Replace("|", " "); text = text.Replace("\r\n", " ").Replace("\n", " ").Replace("\r", " "); statusWriter.WriteLine(string.Concat(new object[] { user_id, "|", username, "|", text })); //Console.Out.WriteLine(string.Concat(new object[] { user_id, "|", username, "|", text })); if (count++ > WRITE_OUT_USERS_EVERY) { count = 0; FileStream userFile = new FileStream("users.txt", FileMode.Create, FileAccess.Write); using (StreamWriter userWriter = new StreamWriter(userFile)) { foreach (KeyValuePair<long, int> userMsgCount in userMsgCounts.OrderBy(key => key.Value)) { user_id = userMsgCount.Key; username = usernameLookup[user_id]; int msgCount = userMsgCount.Value; userWriter.WriteLine(string.Concat(new object[] { msgCount, " messages|", user_id, "|", username })); } userWriter.Flush(); userWriter.Close(); Console.Out.WriteLine("Done writing out " + userMsgCounts.Count + " matching users"); } } statusWriter.Flush(); } } finally { statusWriter.Flush(); statusWriter.Close(); } }