/// <summary> /// Calculates the distance between the two language distribution, /// given that <code>text</code> is the LetterDistribution based on text. /// </summary> /// <param name="language"></param> /// <param name="text"></param> /// <returns></returns> public static double Distance(LetterDistribution language, LetterDistribution text) { double distance2 = 0; foreach (char key in text.letters.Keys) { double textFrequency = (double)(text.letters[key]); double languageFrequency = (double)(language.letters[key]); double sub = textFrequency - languageFrequency; distance2 += sub * sub; } return(distance2); }
/// <summary> /// Detects the source language based on the letter frequency. /// </summary> /// <param name="text"></param> /// <returns>The language code (e.g. "EN" for English), or null if no language was detected</returns> public static string detect(string text) { LetterDistribution textDistribution = new LetterDistribution(text); double minDistance = double.MaxValue; KeyValuePair <string, LetterDistribution> pair = new KeyValuePair <string, LetterDistribution>(null, null); foreach (var lang in languages) { double distance = LetterDistribution.Distance(lang.Value, textDistribution); if (distance < minDistance) { pair = lang; minDistance = distance; } } return(pair.Key); }