Ejemplo n.º 1
0
        public void ExtractHashTags()
        {
            Hashtable hashtags = new Hashtable();

            d.TweetData             tData             = new d.TweetData();
            d.HahstagData           hData             = new d.HahstagData();
            d.TweetHashtagLinkData  tweetHashtagData  = new d.TweetHashtagLinkData();
            d.TwuserHashtagLinkData twuserHashtagData = new d.TwuserHashtagLinkData();
            DataTable dt = tData.GetHashTagTweeets();

            string hashtagPattern = @"(?<tag>#[^\s]+)";

            int counter = 0;

            int    tweetId, tagId;
            string twuserId, text, nohtmltext, tag;

            foreach (DataRow dr in dt.Rows)
            {
                Console.WriteLine("tweet# " + (++counter));

                tweetId    = Convert.ToInt32(dr[0]);
                twuserId   = dr[1].ToString();
                text       = dr[2].ToString();
                nohtmltext = dr[3].ToString();

                Match matchPattern = Regex.Match(nohtmltext, hashtagPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled);

                while (matchPattern.Success)
                {
                    tag = matchPattern.Groups["tag"].ToString().Trim();

                    tag = normalize(tag);
                    if (tag != "")
                    {
                        if (!hashtags.Contains(tag))
                        {
                            tagId = hData.Create(tag);
                            hashtags.Add(tag, tagId);
                        }
                        else
                        {
                            tagId = Convert.ToInt32(hashtags[tag]);
                        }

                        tweetHashtagData.Create(tweetId, tagId);
                        twuserHashtagData.Create(twuserId, tagId);
                    }

                    matchPattern = matchPattern.NextMatch();
                }
            }
        }