/* * Read conversations from file */ private void Read_conversations(string target_file_path) { this.convsList = new ArrayList(); int numberofTweets = 0; int word_count_a_tweet = 0; string[] line_arr = null; SDTM_v1_Conversation one_conv = null; SDTM_v1_Tweet one_tweet = null; SDTM_v1_Word one_word = null; Dictionary <SDTM_v1_Word, int> word_count = null; string one_word_count = null; string[] one_word_count_arr = null; int conv_idx = 0; try { using (StreamReader sr = new StreamReader(target_file_path)) { string line = null; while ((line = sr.ReadLine()) != null) { // Conversation name, userid1_userid2_convid one_conv = new SDTM_v1_Conversation(conv_idx); conv_idx++; line_arr = line.Split('_'); one_conv.set_users(Convert.ToInt32(line_arr[0]), Convert.ToInt32(line_arr[1])); // number of tweets in the conversation line = sr.ReadLine(); numberofTweets = Convert.ToInt32(line); // Each tweet in a conversation for (int tweet_idx = 0; tweet_idx < numberofTweets; tweet_idx++) { // Line format is // user_id lambda_0 lambda_1 numberofuniquewords BagofWordsFormat line = sr.ReadLine(); line_arr = line.Split(' '); one_tweet = new SDTM_v1_Tweet(); one_tweet.set_tweet_id(Convert.ToInt32(line_arr[0])); one_tweet.set_max_ent_prob(Convert.ToDouble(line_arr[1]), Convert.ToDouble(line_arr[2])); word_count = new Dictionary <SDTM_v1_Word, int>(); //word_count_a_tweet = Convert.ToInt32(line_arr[3]) + 4; // At the end of line_arr word_count_a_tweet = line_arr.Length; for (int word_idx = 4; word_idx < word_count_a_tweet; word_idx++) { one_word_count = line_arr[word_idx]; one_word_count_arr = one_word_count.Split(':'); one_word = new SDTM_v1_Word(Convert.ToInt32(one_word_count_arr[0])); word_count.Add(one_word, Convert.ToInt32(one_word_count_arr[1])); } one_tweet.set_word_count(word_count); // insert tweet to conversation one_conv.insert_tweet(one_tweet); } // insert conversation to list this.convsList.Add(one_conv); } } } catch (Exception e) { Console.WriteLine(e); Environment.Exit(1); } this.numConvs = this.convsList.Count; }
/* Read conversations from file */ private void Read_conversations(string target_file_path) { this.convsList = new ArrayList(); int numberofTweets = 0; int word_count_a_tweet = 0; string[] line_arr = null; SDTM_v1_Conversation one_conv = null; SDTM_v1_Tweet one_tweet = null; SDTM_v1_Word one_word = null; Dictionary<SDTM_v1_Word, int> word_count = null; string one_word_count = null; string[] one_word_count_arr = null; int conv_idx = 0; try { using (StreamReader sr = new StreamReader(target_file_path)) { string line = null; while ((line = sr.ReadLine()) != null) { // Conversation name, userid1_userid2_convid one_conv = new SDTM_v1_Conversation(conv_idx); conv_idx++; line_arr = line.Split('_'); one_conv.set_users(Convert.ToInt32(line_arr[0]), Convert.ToInt32(line_arr[1])); // number of tweets in the conversation line = sr.ReadLine(); numberofTweets = Convert.ToInt32(line); // Each tweet in a conversation for (int tweet_idx = 0; tweet_idx < numberofTweets; tweet_idx++) { // Line format is // user_id lambda_0 lambda_1 numberofuniquewords BagofWordsFormat line = sr.ReadLine(); line_arr = line.Split(' '); one_tweet = new SDTM_v1_Tweet(); one_tweet.set_tweet_id(Convert.ToInt32(line_arr[0])); one_tweet.set_max_ent_prob(Convert.ToDouble(line_arr[1]), Convert.ToDouble(line_arr[2])); word_count = new Dictionary<SDTM_v1_Word, int>(); //word_count_a_tweet = Convert.ToInt32(line_arr[3]) + 4; // At the end of line_arr word_count_a_tweet = line_arr.Length; for (int word_idx = 4; word_idx < word_count_a_tweet; word_idx++) { one_word_count = line_arr[word_idx]; one_word_count_arr = one_word_count.Split(':'); one_word = new SDTM_v1_Word(Convert.ToInt32(one_word_count_arr[0])); word_count.Add(one_word, Convert.ToInt32(one_word_count_arr[1])); } one_tweet.set_word_count(word_count); // insert tweet to conversation one_conv.insert_tweet(one_tweet); } // insert conversation to list this.convsList.Add(one_conv); } } } catch (Exception e) { Console.WriteLine(e); Environment.Exit(1); } this.numConvs = this.convsList.Count; }
private void GibbsSampling_Each_conv(SDTM_v1_Conversation one_conv) { // Preparing int oldLevel = 0; int oldTopic = 0; int newLevel = 0; int newTopic = 0; int numTopics_target_level = 0; double prob_part_senti_value = 0.0; double target_sumBeta = 0.0; double beta0, m0, expectLTW, beta, betaw; double prob_table_temp = 0.0; double sumProb = 0.0; // Each tweet foreach (SDTM_v1_Tweet one_tweet in one_conv.tweet_list) { sumProb = 0.0; // Decrease current one_tweet value oldLevel = one_tweet.sd_level; oldTopic = one_tweet.topic; foreach (KeyValuePair <SDTM_v1_Word, int> one_entry in one_tweet.word_count_table) { this.matrixLTW[oldLevel, oldTopic, one_entry.Key.wordidx]--; } this.sumLTW[oldLevel, oldTopic] -= one_tweet.word_count_table.Count; one_conv.CLT[oldLevel, oldTopic]--; one_conv.sumCLT[oldLevel]--; // Fill probability table // Level 0 numTopics_target_level = numTopics_arr[0]; prob_part_senti_value = one_tweet.max_ent_prob[0] / (one_conv.sumCLT[0] + this.sumAlpha[0]); target_sumBeta = this.sumBeta[0]; for (int ti = 0; ti < numTopics_target_level; ti++) { beta0 = this.sumLTW[0, ti] + target_sumBeta; m0 = 0; expectLTW = 1.0; foreach (KeyValuePair <SDTM_v1_Word, int> one_entry in one_tweet.word_count_table) { if (-1 == one_entry.Key.seed_word_level) { beta = this.betas[0]; } else { beta = this.betas[2]; } betaw = this.matrixLTW[0, ti, one_entry.Key.wordidx] + beta; for (int m = 0; m < (int)one_entry.Value; m++) { expectLTW *= (betaw + m) / (beta0 + m0); m0++; } } prob_table_temp = (one_conv.CLT[0, ti] + this.alpha) * prob_part_senti_value * expectLTW; sumProb += prob_table_temp; this.probTable[ti, 0] = prob_table_temp; } // Level 1 and 2 for (int level_idx = 1; level_idx < SDTM_v1.numLevels; level_idx++) { numTopics_target_level = numTopics_arr[level_idx]; prob_part_senti_value = one_tweet.max_ent_prob[1] / (one_conv.sumCLT[level_idx] + this.sumAlpha[level_idx]); target_sumBeta = this.sumBeta[level_idx]; for (int ti = 0; ti < numTopics_target_level; ti++) { beta0 = this.sumLTW[level_idx, ti] + target_sumBeta; m0 = 0; expectLTW = 1.0; foreach (KeyValuePair <SDTM_v1_Word, int> one_entry in one_tweet.word_count_table) { if (-1 == one_entry.Key.seed_word_level) { beta = this.betas[0]; } else if (level_idx == one_entry.Key.seed_word_level) { beta = this.betas[1]; } else { beta = this.betas[2]; } betaw = this.matrixLTW[level_idx, ti, one_entry.Key.wordidx] + beta; for (int m = 0; m < (int)one_entry.Value; m++) { expectLTW *= (betaw + m) / (beta0 + m0); m0++; } } prob_table_temp = (one_conv.CLT[level_idx, ti] + this.alpha) * prob_part_senti_value * expectLTW; sumProb += prob_table_temp; this.probTable[ti, level_idx] = prob_table_temp; } } // Multinomial sampling Multinomial_sampling(sumProb, out newLevel, out newTopic); // Assign and increase with new value one_tweet.set_sd_level(newLevel); one_tweet.set_topic(newTopic); foreach (KeyValuePair <SDTM_v1_Word, int> one_entry in one_tweet.word_count_table) { this.matrixLTW[newLevel, newTopic, one_entry.Key.wordidx]++; } this.sumLTW[newLevel, newTopic] += one_tweet.word_count_table.Count; one_conv.CLT[newLevel, newTopic]++; one_conv.sumCLT[newLevel]++; } }
private void GibbsSampling_Each_conv(SDTM_v1_Conversation one_conv) { // Preparing int oldLevel = 0; int oldTopic = 0; int newLevel = 0; int newTopic = 0; int numTopics_target_level = 0; double prob_part_senti_value = 0.0; double target_sumBeta = 0.0; double beta0, m0, expectLTW, beta, betaw; double prob_table_temp = 0.0; double sumProb = 0.0; // Each tweet foreach (SDTM_v1_Tweet one_tweet in one_conv.tweet_list) { sumProb = 0.0; // Decrease current one_tweet value oldLevel = one_tweet.sd_level; oldTopic = one_tweet.topic; foreach (KeyValuePair<SDTM_v1_Word, int> one_entry in one_tweet.word_count_table) { this.matrixLTW[oldLevel, oldTopic, one_entry.Key.wordidx]--; } this.sumLTW[oldLevel, oldTopic] -= one_tweet.word_count_table.Count; one_conv.CLT[oldLevel, oldTopic]--; one_conv.sumCLT[oldLevel]--; // Fill probability table // Level 0 numTopics_target_level = numTopics_arr[0]; prob_part_senti_value = one_tweet.max_ent_prob[0] / (one_conv.sumCLT[0] + this.sumAlpha[0]); target_sumBeta = this.sumBeta[0]; for (int ti = 0; ti < numTopics_target_level; ti++) { beta0 = this.sumLTW[0, ti] + target_sumBeta; m0 = 0; expectLTW = 1.0; foreach (KeyValuePair<SDTM_v1_Word, int> one_entry in one_tweet.word_count_table) { if (-1 == one_entry.Key.seed_word_level) { beta = this.betas[0]; } else { beta = this.betas[2]; } betaw = this.matrixLTW[0, ti, one_entry.Key.wordidx] + beta; for (int m = 0; m < (int) one_entry.Value; m++) { expectLTW *= (betaw + m) / (beta0 + m0); m0++; } } prob_table_temp = (one_conv.CLT[0, ti] + this.alpha) * prob_part_senti_value * expectLTW; sumProb += prob_table_temp; this.probTable[ti, 0] = prob_table_temp; } // Level 1 and 2 for (int level_idx = 1; level_idx < SDTM_v1.numLevels; level_idx++) { numTopics_target_level = numTopics_arr[level_idx]; prob_part_senti_value = one_tweet.max_ent_prob[1] / (one_conv.sumCLT[level_idx] + this.sumAlpha[level_idx]); target_sumBeta = this.sumBeta[level_idx]; for (int ti = 0; ti < numTopics_target_level; ti++) { beta0 = this.sumLTW[level_idx, ti] + target_sumBeta; m0 = 0; expectLTW = 1.0; foreach (KeyValuePair<SDTM_v1_Word, int> one_entry in one_tweet.word_count_table) { if (-1 == one_entry.Key.seed_word_level) { beta = this.betas[0]; } else if (level_idx == one_entry.Key.seed_word_level) { beta = this.betas[1]; } else { beta = this.betas[2]; } betaw = this.matrixLTW[level_idx, ti, one_entry.Key.wordidx] + beta; for (int m = 0; m < (int)one_entry.Value; m++) { expectLTW *= (betaw + m) / (beta0 + m0); m0++; } } prob_table_temp = (one_conv.CLT[level_idx, ti] + this.alpha) * prob_part_senti_value * expectLTW; sumProb += prob_table_temp; this.probTable[ti, level_idx] = prob_table_temp; } } // Multinomial sampling Multinomial_sampling(sumProb, out newLevel, out newTopic); // Assign and increase with new value one_tweet.set_sd_level(newLevel); one_tweet.set_topic(newTopic); foreach (KeyValuePair<SDTM_v1_Word, int> one_entry in one_tweet.word_count_table) { this.matrixLTW[newLevel, newTopic, one_entry.Key.wordidx]++; } this.sumLTW[newLevel, newTopic] += one_tweet.word_count_table.Count; one_conv.CLT[newLevel, newTopic]++; one_conv.sumCLT[newLevel]++; } }