public void Run() { string nextAccount; string accountPageUrl; string accountPageSource; int counter = 0; while (accounts.Count > 0) { Thread.Sleep(2000); nextAccount = accounts.Dequeue(); if (!twuserData.Exists(nextAccount)) { accountPageUrl = "http://twitter.com/" + nextAccount; accountPageSource = getPageSource(accountPageUrl, false); if (accountPageSource != null && accountPageSource != "") { Console.WriteLine("Processing account #" + (counter++) + ": " + nextAccount); Console.WriteLine("Queue size = " + accounts.Count); processTwuser(nextAccount, accountPageSource); processFollowing(nextAccount, accountPageSource); processTweets(nextAccount, accountPageSource); } else { Console.WriteLine("ERROR: empty page for " + nextAccount); } } } }
public void Run() { string seedAccount, seedPageUrl, seedPageSource; string followeeAccount, followeePageUrl, followeePageSource; DataTable dtFollowees; HashSet <string> visited = new HashSet <string>(); DataTable dt = twuserData.GetSeedIds(); foreach (DataRow dr in dt.Rows) { seedAccount = dr[0].ToString(); if (!visited.Contains(seedAccount)) { visited.Add(seedAccount); seedPageUrl = "http://twitter.com/" + seedAccount; seedPageSource = getPageSource(seedPageUrl); if (seedPageSource != null && seedPageSource != "") { seedCounter++; Console.WriteLine("Processing account " + (seedCounter) + " of " + dt.Rows.Count + ": " + seedAccount); processTweets(seedAccount, seedPageSource); //must be done again: the seed account's tweets must be from the same time as the followee's tweets dtFollowees = twuserData.GetFollowees(seedAccount); //because we use only existing accounts for this loop - i.e., they already have been processed, so we have followees for them. foreach (DataRow drFollowee in dtFollowees.Rows) { followeeAccount = drFollowee[0].ToString(); if (!visited.Contains(followeeAccount)) { visited.Add(followeeAccount); followeePageUrl = "http://twitter.com/" + followeeAccount; followeePageSource = getPageSource(followeePageUrl); if (followeePageSource != null && followeePageSource != "") { if (!twuserData.Exists(followeeAccount)) { processTwuser(followeeAccount, followeePageSource); } processFollowing(followeeAccount, followeePageSource); processTweets(followeeAccount, followeePageSource); //again, same tweets are added to repeat users! } } } twuserData.MarkFolloweesAdded(seedAccount); } else { Console.WriteLine("ERROR: empty page for " + seedAccount); } } } }