/* * Check existence of seed words in a tweet */ private int is_exist_seed_words_a_tweet(SDTM_v1_Tweet one_tweet) { int target_level_idx = 0; foreach (KeyValuePair <SDTM_v1_Word, int> one_entry in one_tweet.word_count_table) { for (int level_idx = 0; level_idx < SDTM_v1.numLevels; level_idx++) { if (this.seed_words_list_level_arr[level_idx].Contains(one_entry.Key.wordidx)) { target_level_idx = level_idx; one_entry.Key.seed_word_level = level_idx; } } } // No seed words in a tweet return(target_level_idx); }
/* * Read conversations from file */ private void Read_conversations(string target_file_path) { this.convsList = new ArrayList(); int numberofTweets = 0; int word_count_a_tweet = 0; string[] line_arr = null; SDTM_v1_Conversation one_conv = null; SDTM_v1_Tweet one_tweet = null; SDTM_v1_Word one_word = null; Dictionary <SDTM_v1_Word, int> word_count = null; string one_word_count = null; string[] one_word_count_arr = null; int conv_idx = 0; try { using (StreamReader sr = new StreamReader(target_file_path)) { string line = null; while ((line = sr.ReadLine()) != null) { // Conversation name, userid1_userid2_convid one_conv = new SDTM_v1_Conversation(conv_idx); conv_idx++; line_arr = line.Split('_'); one_conv.set_users(Convert.ToInt32(line_arr[0]), Convert.ToInt32(line_arr[1])); // number of tweets in the conversation line = sr.ReadLine(); numberofTweets = Convert.ToInt32(line); // Each tweet in a conversation for (int tweet_idx = 0; tweet_idx < numberofTweets; tweet_idx++) { // Line format is // user_id lambda_0 lambda_1 numberofuniquewords BagofWordsFormat line = sr.ReadLine(); line_arr = line.Split(' '); one_tweet = new SDTM_v1_Tweet(); one_tweet.set_tweet_id(Convert.ToInt32(line_arr[0])); one_tweet.set_max_ent_prob(Convert.ToDouble(line_arr[1]), Convert.ToDouble(line_arr[2])); word_count = new Dictionary <SDTM_v1_Word, int>(); //word_count_a_tweet = Convert.ToInt32(line_arr[3]) + 4; // At the end of line_arr word_count_a_tweet = line_arr.Length; for (int word_idx = 4; word_idx < word_count_a_tweet; word_idx++) { one_word_count = line_arr[word_idx]; one_word_count_arr = one_word_count.Split(':'); one_word = new SDTM_v1_Word(Convert.ToInt32(one_word_count_arr[0])); word_count.Add(one_word, Convert.ToInt32(one_word_count_arr[1])); } one_tweet.set_word_count(word_count); // insert tweet to conversation one_conv.insert_tweet(one_tweet); } // insert conversation to list this.convsList.Add(one_conv); } } } catch (Exception e) { Console.WriteLine(e); Environment.Exit(1); } this.numConvs = this.convsList.Count; }
public void insert_tweet(SDTM_v1_Tweet one_tweet) { this.tweet_list.Add(one_tweet); }
/* Read conversations from file */ private void Read_conversations(string target_file_path) { this.convsList = new ArrayList(); int numberofTweets = 0; int word_count_a_tweet = 0; string[] line_arr = null; SDTM_v1_Conversation one_conv = null; SDTM_v1_Tweet one_tweet = null; SDTM_v1_Word one_word = null; Dictionary<SDTM_v1_Word, int> word_count = null; string one_word_count = null; string[] one_word_count_arr = null; int conv_idx = 0; try { using (StreamReader sr = new StreamReader(target_file_path)) { string line = null; while ((line = sr.ReadLine()) != null) { // Conversation name, userid1_userid2_convid one_conv = new SDTM_v1_Conversation(conv_idx); conv_idx++; line_arr = line.Split('_'); one_conv.set_users(Convert.ToInt32(line_arr[0]), Convert.ToInt32(line_arr[1])); // number of tweets in the conversation line = sr.ReadLine(); numberofTweets = Convert.ToInt32(line); // Each tweet in a conversation for (int tweet_idx = 0; tweet_idx < numberofTweets; tweet_idx++) { // Line format is // user_id lambda_0 lambda_1 numberofuniquewords BagofWordsFormat line = sr.ReadLine(); line_arr = line.Split(' '); one_tweet = new SDTM_v1_Tweet(); one_tweet.set_tweet_id(Convert.ToInt32(line_arr[0])); one_tweet.set_max_ent_prob(Convert.ToDouble(line_arr[1]), Convert.ToDouble(line_arr[2])); word_count = new Dictionary<SDTM_v1_Word, int>(); //word_count_a_tweet = Convert.ToInt32(line_arr[3]) + 4; // At the end of line_arr word_count_a_tweet = line_arr.Length; for (int word_idx = 4; word_idx < word_count_a_tweet; word_idx++) { one_word_count = line_arr[word_idx]; one_word_count_arr = one_word_count.Split(':'); one_word = new SDTM_v1_Word(Convert.ToInt32(one_word_count_arr[0])); word_count.Add(one_word, Convert.ToInt32(one_word_count_arr[1])); } one_tweet.set_word_count(word_count); // insert tweet to conversation one_conv.insert_tweet(one_tweet); } // insert conversation to list this.convsList.Add(one_conv); } } } catch (Exception e) { Console.WriteLine(e); Environment.Exit(1); } this.numConvs = this.convsList.Count; }
/* Check existence of seed words in a tweet */ private int is_exist_seed_words_a_tweet(SDTM_v1_Tweet one_tweet) { int target_level_idx = 0; foreach (KeyValuePair<SDTM_v1_Word, int> one_entry in one_tweet.word_count_table) { for (int level_idx = 0; level_idx < SDTM_v1.numLevels; level_idx++) { if (this.seed_words_list_level_arr[level_idx].Contains(one_entry.Key.wordidx)) { target_level_idx = level_idx; one_entry.Key.seed_word_level = level_idx; } } } // No seed words in a tweet return target_level_idx; }