static void Main(string[] args) { TranslationTrainingData trainingData = TranslationTrainingData.AlienLanguage; IBMModel1Dictionary dictionary = IBMModel1Dictionary.Train(trainingData, 20); foreach (string f in trainingData.FWords) { KeyValuePair <string, float>[] translations = dictionary.Table[f] .Select((pair, index) => { return(pair); }) .ToArray(); TranslationComparer comparer = new TranslationComparer(); Array.Sort(translations, comparer); Array.Reverse(translations); IEnumerable <string> bestTranslations = translations .Take(3) .Select((pair, index) => $"{pair.Key}({pair.Value:0.00})"); Console.WriteLine("(f = {0}): {1}", f, string.Join(", ", bestTranslations)); } }
public static IBMModel1Dictionary Train( TranslationTrainingData trainingData, int iterations = 10, float defaultTableValue = 1.0f) { IBMModel1Dictionary dictionary = new IBMModel1Dictionary(); // Initialize dictionary foreach (string f in trainingData.FWords) { foreach (string e in trainingData.EWords) { dictionary.Table.InitializeDefautValue(f, e, defaultTableValue); } } for (int iteration = 0; iteration < iterations; iteration++) { // count(e|f) FETable count = new FETable(); // total(f) Dictionary <string, float> total = new Dictionary <string, float>(); // s-total(e) Dictionary <string, float> sTotal = new Dictionary <string, float>(); // Initialize default values // - count // - total foreach (string f in trainingData.FWords) { total[f] = 0.0f; foreach (string e in trainingData.EWords) { count.InitializeDefautValue(f, e, 0.0f); } } foreach (SentencePair sentencePair in trainingData) { // Compute normalizations foreach (string e in sentencePair.EWords) { sTotal[e] = 0.0f; foreach (string f in sentencePair.FWords) { sTotal[e] += dictionary[f, e]; } } // Collect counts foreach (string e in sentencePair.EWords) { foreach (string f in sentencePair.FWords) { count[f][e] += dictionary[f, e] / sTotal[e]; total[f] += dictionary[f, e] / sTotal[e]; } } } // Esitmate probabilities foreach (string f in trainingData.FWords) { foreach (string e in trainingData.EWords) { dictionary[f, e] = count[f][e] / total[f]; } } } return(dictionary); }