public void RunEMTraining() { IBMModel2 model = new IBMModel2(); List <EMTrainingRecord> training_corpus = new List <EMTrainingRecord>(); EnglishTokenizer tokenizer_output = new EnglishTokenizer(); FrenchTokenizer tokenizer_input = new FrenchTokenizer(); EMTrainingMethod.Train(model, training_corpus, 20); string sentence_input = "[Some French Sentence]"; string sentence_output = "[Some English Sentence]"; string[] input_lang = tokenizer_input.Tokenize(sentence_input); string[] output_lang = tokenizer_output.Tokenize(sentence_output); int[] alignment = model.GetAlignment(input_lang, output_lang); Dictionary <int, string> output_mapping = new Dictionary <int, string>(); int m_input_len = input_lang.Length; for (int j = 0; j < m_input_len; ++j) { int a_j = alignment[j]; string output_word = output_lang[a_j]; output_mapping[a_j] = output_word; } List <int> output_sentence_index_list = output_mapping.Keys.ToList(); output_sentence_index_list.Sort(); string[] predicted_output_lang = new string[output_sentence_index_list.Count]; for (int i = 0; i < predicted_output_lang.Length; ++i) { predicted_output_lang[i] = output_mapping[output_sentence_index_list[i]]; } Console.WriteLine("Original French Sentence: {0}", sentence_input); Console.WriteLine("Predicted English Translation: {0}", string.Join(" ", predicted_output_lang)); }
public void GetAlignmentMatrix(IBMModel2 model_f_to_e, IBMModel2 model_e_to_f, string[] ws_f, string[] ws_e, out Dictionary <int, int> intersection_points) { int[] alignment_f_to_e = model_f_to_e.GetAlignment(ws_f, ws_e); int[] alignment_e_to_f = model_e_to_f.GetAlignment(ws_e, ws_f); int m_f_len = alignment_f_to_e.Length; int l_e_len = alignment_e_to_f.Length; intersection_points = new Dictionary <int, int>(); int[][] alignment_matrix = new int[m_f_len][]; for (int j = 0; j < m_f_len; ++j) { alignment_matrix[j] = new int[l_e_len]; for (int i = 0; i < l_e_len; ++i) { if (alignment_f_to_e[j] == i && alignment_e_to_f[i] == j) { intersection_points[j] = i; alignment_matrix[j][i] = 1; } } } }