/// <summary>
        /// Calculates the distance between the two language distribution,
        /// given that <code>text</code> is the LetterDistribution based on text.
        /// </summary>
        /// <param name="language"></param>
        /// <param name="text"></param>
        /// <returns></returns>
        public static double Distance(LetterDistribution language, LetterDistribution text)
        {
            double distance2 = 0;

            foreach (char key in text.letters.Keys)
            {
                double textFrequency     = (double)(text.letters[key]);
                double languageFrequency = (double)(language.letters[key]);
                double sub = textFrequency - languageFrequency;
                distance2 += sub * sub;
            }

            return(distance2);
        }
Beispiel #2
0
        /// <summary>
        /// Detects the source language based on the letter frequency.
        /// </summary>
        /// <param name="text"></param>
        /// <returns>The language code (e.g. "EN" for English), or null if no language was detected</returns>
        public static string detect(string text)
        {
            LetterDistribution textDistribution = new LetterDistribution(text);

            double minDistance = double.MaxValue;
            KeyValuePair <string, LetterDistribution> pair = new KeyValuePair <string, LetterDistribution>(null, null);

            foreach (var lang in languages)
            {
                double distance = LetterDistribution.Distance(lang.Value, textDistribution);

                if (distance < minDistance)
                {
                    pair        = lang;
                    minDistance = distance;
                }
            }

            return(pair.Key);
        }