public void ExtractHashTags() { Hashtable hashtags = new Hashtable(); d.TweetData tData = new d.TweetData(); d.HahstagData hData = new d.HahstagData(); d.TweetHashtagLinkData tweetHashtagData = new d.TweetHashtagLinkData(); d.TwuserHashtagLinkData twuserHashtagData = new d.TwuserHashtagLinkData(); DataTable dt = tData.GetHashTagTweeets(); string hashtagPattern = @"(?<tag>#[^\s]+)"; int counter = 0; int tweetId, tagId; string twuserId, text, nohtmltext, tag; foreach (DataRow dr in dt.Rows) { Console.WriteLine("tweet# " + (++counter)); tweetId = Convert.ToInt32(dr[0]); twuserId = dr[1].ToString(); text = dr[2].ToString(); nohtmltext = dr[3].ToString(); Match matchPattern = Regex.Match(nohtmltext, hashtagPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); while (matchPattern.Success) { tag = matchPattern.Groups["tag"].ToString().Trim(); tag = normalize(tag); if (tag != "") { if (!hashtags.Contains(tag)) { tagId = hData.Create(tag); hashtags.Add(tag, tagId); } else { tagId = Convert.ToInt32(hashtags[tag]); } tweetHashtagData.Create(tweetId, tagId); twuserHashtagData.Create(twuserId, tagId); } matchPattern = matchPattern.NextMatch(); } } }
public void Run() { DataTable dtFollowees, dtFollowerHashtags, dtFolloweeHashtags; string followerId, followeeId; string hashtagId; Hashtable hashFollowerTags; int tweetsByFollower, tweetsByFollowee; d.TwuserData uData = new d.TwuserData(); d.TwuserHashtagLinkData uhData = new d.TwuserHashtagLinkData(); d.HahstagOverlapData htovData = new d.HahstagOverlapData(); DataTable dtTwusers = uData.GetFollowers(0); int counter = 1; foreach (DataRow drUser in dtTwusers.Rows) { followerId = drUser[0].ToString(); Console.WriteLine("\nprocessing user " + (counter++) + " of " + dtTwusers.Rows.Count); dtFollowerHashtags = uhData.GetHashtags(followerId); dtFollowees = uData.GetProcessedFollowees(followerId); hashFollowerTags = new Hashtable(); foreach (DataRow drTag1 in dtFollowerHashtags.Rows) { hashFollowerTags.Add(drTag1[0].ToString(), drTag1[1].ToString()); //load follower's hashtags + their tweet counts into hashtable } foreach (DataRow drFollowee in dtFollowees.Rows) { followeeId = drFollowee[0].ToString(); dtFolloweeHashtags = uhData.GetHashtags(followeeId); Console.WriteLine("\tfollowee " + followeeId + "(" + dtFolloweeHashtags.Rows.Count + " hashtags) out of " + dtFollowees.Rows.Count + " followeees"); foreach (DataRow drTag2 in dtFolloweeHashtags.Rows) { hashtagId = drTag2[0].ToString(); if (hashFollowerTags.ContainsKey(hashtagId)) { Console.WriteLine("\t\tadding overlap for tag " + hashtagId); tweetsByFollower = Convert.ToInt32(hashFollowerTags[hashtagId]); tweetsByFollowee = Convert.ToInt32(drTag2[1]); htovData.Create(followerId, followeeId, Convert.ToInt32(hashtagId), tweetsByFollower, tweetsByFollowee); } } } } }