public void RunEMTraining()
        {
            IBMModel1 model = new IBMModel1();

            List <EMTrainingRecord> training_corpus = new List <EMTrainingRecord>();

            EnglishTokenizer tokenizer_output = new EnglishTokenizer();
            FrenchTokenizer  tokenizer_input  = new FrenchTokenizer();

            //training_corpus_from_f_to_e.Add(new EMTrainingRecord()
            //{
            //    InputLang=tokenizer_input.Tokenize("[Some French Sentence]"),
            //    OutputLang = tokenizer_output.Tokenize("[Some English Sentence]"),
            //});
            //...

            EMTrainingMethod.Train(model, training_corpus, 20);

            string sentence_input  = "[Some French Sentence]";
            string sentence_output = "[Some English Sentence]";

            string[] input_lang  = tokenizer_input.Tokenize(sentence_input);
            string[] output_lang = tokenizer_output.Tokenize(sentence_output);
            int[]    alignment   = model.GetAlignment(input_lang, output_lang);

            Dictionary <int, string> output_mapping = new Dictionary <int, string>();
            int m_input_len = input_lang.Length;

            for (int j = 0; j < m_input_len; ++j)
            {
                int    a_j         = alignment[j];
                string output_word = output_lang[a_j];
                output_mapping[a_j] = output_word;
            }
            List <int> output_sentence_index_list = output_mapping.Keys.ToList();

            output_sentence_index_list.Sort();

            string[] predicted_output_lang = new string[output_sentence_index_list.Count];
            for (int i = 0; i < predicted_output_lang.Length; ++i)
            {
                predicted_output_lang[i] = output_mapping[output_sentence_index_list[i]];
            }

            Console.WriteLine("Original French Sentence: {0}", sentence_input);
            Console.WriteLine("Predicted English Translation: {0}", string.Join(" ", predicted_output_lang));
        }
Example #2
0
 public void InitializeIBMModels(IBMModel2 model_f_to_e, IBMModel2 model_e_to_f, IEnumerable <EMTrainingRecord> training_corpus_f_to_e, int maxIterations)
 {
     EMTrainingMethod.Train(model_f_to_e, model_e_to_f, training_corpus_f_to_e, maxIterations);
 }