public void AddLanguages(params string[] languages)
        {
            Assembly assembly = GetType().Assembly;

            foreach (string language in languages)
            {
                using (Stream stream = assembly.GetManifestResourceStream(ResourceNamePrefix + language + ".bin.gz"))
                    using (Stream decompressedStream = new GZipStream(stream, CompressionMode.Decompress))
                    {
                        LanguageProfile profile = new LanguageProfile();
                        profile.Load(decompressedStream);
                        AddLanguageProfile(profile);
                    }
            }
        }
        private void UpdateProbabilities(double[] prob, string word, double alpha)
        {
            if (word == null || !wordLanguageProbabilities.ContainsKey(word))
            {
                return;
            }

            var    languageProbabilities = wordLanguageProbabilities[word];
            double weight = alpha / BaseFrequency;

            for (int i = 0; i < prob.Length; i++)
            {
                LanguageProfile profile = languages[i];
                prob[i] *= weight + (languageProbabilities.ContainsKey(profile) ? languageProbabilities[profile] : 0);
            }
        }
Пример #3
0
        public void AddLanguages(params string[] languages)
        {
            Assembly assembly = GetType().GetTypeInfo().Assembly;

            foreach (string language in languages)
            {
                using (Stream stream = assembly.GetManifestResourceStream(ResourceNamePrefix + language))
                {
                    using (StreamReader sr = new StreamReader(stream))
                    {
                        LanguageProfile profile = System.Text.Json.JsonSerializer.Deserialize <LanguageProfile>(sr.ReadToEnd());
                        AddLanguageProfile(profile);
                    }
                }
            }
        }
        private void AddLanguageProfile(LanguageProfile profile)
        {
            languages.Add(profile);

            foreach (string word in profile.Frequencies.Keys)
            {
                if (!wordLanguageProbabilities.ContainsKey(word))
                {
                    wordLanguageProbabilities[word] = new Dictionary <LanguageProfile, double>();
                }

                if (word.Length >= 1 && word.Length <= NGramLength)
                {
                    double prob = (double)profile.Frequencies[word] / profile.WordCount[word.Length - 1];
                    wordLanguageProbabilities[word][profile] = prob;
                }
            }
        }
Пример #5
0
        public void AddLanguages(params string[] languages)
        {
            Assembly assembly = GetType().Assembly;

            foreach (string language in languages)
            {
                using (Stream stream = assembly.GetManifestResourceStream(ResourceNamePrefix + language))
                    using (var sw = new StreamReader(stream))
                    {
                        LanguageProfile profile = new LanguageProfile();

                        string json = sw.ReadToEnd();
                        JsonLanguageProfile jsonProfile = JsonConvert.DeserializeObject <JsonLanguageProfile>(json);

                        profile.Code        = jsonProfile.name;
                        profile.Frequencies = jsonProfile.freq;
                        profile.WordCount   = jsonProfile.n_words;

                        //profile.Load(stream);
                        AddLanguageProfile(profile);
                    }
            }
        }
        private void AddLanguageProfile(LanguageProfile profile)
        {
            languages.Add(profile);

            foreach (string word in profile.Frequencies.Keys)
            {
                if (!wordLanguageProbabilities.ContainsKey(word))
                    wordLanguageProbabilities[word] = new Dictionary<LanguageProfile, double>();

                if (word.Length >= 1 && word.Length <= NGramLength)
                {
                    double prob = (double)profile.Frequencies[word] / profile.WordCount[word.Length - 1];
                    wordLanguageProbabilities[word][profile] = prob;
                }
            }
        }
        public void AddLanguages(params string[] languages)
        {
            Assembly assembly = GetType().Assembly;

            foreach (string language in languages)
            {
                using (Stream stream = assembly.GetManifestResourceStream(ResourceNamePrefix + language + ".bin.gz"))
                using (Stream decompressedStream = new GZipStream(stream, CompressionMode.Decompress))
                {
                    LanguageProfile profile = new LanguageProfile();
                    profile.Load(decompressedStream);
                    AddLanguageProfile(profile);
                }
            }
        }