Пример #1
0
        public DictionaryTest()
        {
            dictionary = new Dictionary(new DictionaryLoader(new DefaultDictionaryFileParser()),
                "en_US",
                "gen",
                "abcdefghijklmnopqrstuvwxyz".ToCharArray()
            );

            dictionary.Add("acres");

            Dictionary<char, List<char>> accentPairs = new Dictionary<char, List<char>>();
            accentPairs.Add('a', new List<char>() { 'á' });
            accentPairs.Add('e', new List<char>() { 'é', 'ě' });
            accentPairs.Add('i', new List<char>() { 'í' });
            accentPairs.Add('o', new List<char>() { 'ó' });
            accentPairs.Add('u', new List<char>() { 'ú', 'ů' });
            accentPairs.Add('y', new List<char>() { 'ý' });
            accentPairs.Add('c', new List<char>() { 'č' });
            accentPairs.Add('d', new List<char>() { 'ď' });
            accentPairs.Add('n', new List<char>() { 'ň' });
            accentPairs.Add('r', new List<char>() { 'ř' });
            accentPairs.Add('s', new List<char>() { 'š' });
            accentPairs.Add('t', new List<char>() { 'ť' });
            accentPairs.Add('z', new List<char>() { 'ž' });

            csCZ = new Dictionary(new DictionaryLoader(new DefaultDictionaryFileParser()),
                "cs_CZ",
                "gen",
                "abcdefghijklmnopqrstuvwxyzáéíóúýčďěňřšťžů".ToCharArray(),
                null,
                "[a-záéíóúýčďěňřšťžů]+",
                accentPairs);

            csCZ.Add("večeře");
            csCZ.Add("véčera");
            csCZ.Add("věci");
        }
Пример #2
0
        public LanguageModelEvaluation EvaluateCandidates(MisspelledWord word, Dictionary<string, double> candidates)
        {
            foundInNgrams = false;
            List<string> leftContext = word.GetLeftContext();

            NgramType type = this.dictionary.GetHighestAvailableNgramCollection(leftContext.Count);

            Dictionary<string, double> probability = new Dictionary<string, double>();
            string[] lcArray = this.GetLeftContext(leftContext, type);
            NgramEvaluation evaluation;
            foreach (KeyValuePair<string, double> option in candidates)
            {
                lcArray[leftContext.Count - 1] = option.Key.Contains(' ') ? option.Key.Split(space).First() : option.Key;

                evaluation = this.dictionary.GetNgramCollection(type).GetProbability(lcArray);
                probability.Add(option.Key, evaluation.Probability);

                if (!foundInNgrams && evaluation.Occurence > 0)
                {
                    foundInNgrams = true;
                }
            }

            List<string> rightContext = word.GetRightContext();
            NgramType secType = this.dictionary.GetHighestAvailableNgramCollection(rightContext.Count);

            if (type == NgramType.Unigram && type == NgramType.Unigram)
            {
                // do nothing
            }
            else
            {
                string[] rcArray = this.GetRightContext(rightContext, secType);
                foreach (KeyValuePair<string, double> option in candidates)
                {
                    rcArray[0] = option.Key.Contains(' ') ? option.Key.Split(space).Last() : option.Key;

                    evaluation = this.dictionary.GetNgramCollection(secType).GetProbability(rcArray);
                    probability[option.Key] *= evaluation.Probability;

                    if (!foundInNgrams && evaluation.Occurence > 0)
                    {
                        foundInNgrams = true;
                    }
                }
            }

            return new LanguageModelEvaluation(probability, foundInNgrams);
        }
Пример #3
0
        public void SetValue(char rowKey, char columnKey, int value)
        {
            if (matrix.ContainsKey(rowKey))
            {

                if (matrix[rowKey].ContainsKey(columnKey))
                {
                    matrix[rowKey][columnKey] = value;
                }
                else
                {
                    matrix[rowKey].Add(columnKey, value);
                }

            }
            else
            {
                Dictionary<char, int> innerDict = new Dictionary<char,int>();
                innerDict.Add(columnKey, value);

                matrix.Add(rowKey, innerDict);
            }
        }
Пример #4
0
        internal void ParseDictionary(Dictionary dictionary)
        {
            AffixRules rules = null;
            Encoding encoding = null;

            string affixFile = dictionary.GetFile(DictionaryFileType.Affix);
            if (null != affixFile)
            {
                encoding = Utils.DetectEncoding(affixFile);
                if (null == encoding)
                {
                    encoding = EncodingDetector.DetectEncoding(affixFile);
                }
                rules = this.affixParser.Parse(affixFile, encoding);
            }

            string fileName = dictionary.GetFile(DictionaryFileType.Dictionary);
            DictionaryWithFlags rawDict = this.parser.Parse(fileName, encoding);

            foreach (DictionaryItemWithFlags item in rawDict)
            {
                if (null == item.Flags)
                {
                    dictionary.Add(item.Word);
                }
                else
                {
                    dictionary.AddRange(rules.GetPossibleWords(item));
                }
            }
        }
Пример #5
0
 // todo move
 internal void ParseSimpleDictionary(Dictionary dictionary)
 {
     string file = dictionary.GetFile(DictionaryFileType.LineDictionary);
     if (null != file)
     {
         Encoding enc = EncodingDetector.DetectEncoding(file);
         using (StreamReader reader = new StreamReader(file, enc))
         {
             while (!reader.EndOfStream)
             {
                 dictionary.Add(reader.ReadLine());
             }
         }
     }
 }
Пример #6
0
        public Dictionary<string, double> GeneratePossibleWords(string word)
        {
            Dictionary<string, double> result = new Dictionary<string, double>();

            // substitution
            for (int i = 0; i < word.Length; i++)
            {
                foreach (string charItem in alphabet)
                {
                    string edited = String.Copy(word).Remove(i, 1).Insert(i, charItem);
                    if (dictionary.FindWord(edited))
                    {
                        double prop = this.CalculateProbability(EditOperation.Substitution, word[i], charItem[0]);
                        if (!result.ContainsKey(edited))
                        {
                            result.Add(edited, prop);
                        }
                        else if (prop > result[edited])
                        {
                            result[edited] = prop;
                        }
                    }
                }
            }

            // deletions
            for (int i = 0; i < word.Length; i++)
            {
                string edited = String.Copy(word).Remove(i, 1);
                if (dictionary.FindWord(edited))
                {
                    char prev = (i - 1) < 0 ? ' ' : word[i];
                    double prop = this.CalculateProbability(EditOperation.Deletion,prev,word[i]);
                    if (!result.ContainsKey(edited))
                    {
                        result.Add(edited, prop);
                    }
                    else if (prop > result[edited])
                    {
                        result[edited] = prop;
                    }
                }
            }

            bool found = false;
            // insertions
            for (int i = 0; i <= word.Length; i++)
            {
                foreach (string item in alphabetWithSpace)
                {

                    string edited = String.Copy(word).Insert(i, item);
                    if (item == " ")
                    {
                        string tr = edited.Trim();
                        if (tr != word)
                        {
                            string[] parts = tr.Split(space);
                            foreach (string part in parts)
                            {
                                if (dictionary.FindWord(part))
                                {
                                    found = true;
                                }
                                else
                                {
                                    found = false;
                                    break;
                                }
                            }
                        }
                    }
                    if (found || dictionary.FindWord(edited))
                    {
                        char prev = (i - 1) < 0 ? ' ' : word[i-1];
                        double prop = this.CalculateProbability(EditOperation.Insertion, prev, item[0]);
                        if (!result.ContainsKey(edited))
                        {
                            result.Add(edited, prop);
                        }
                        else if (prop > result[edited])
                        {
                            result[edited] = prop;
                        }

                        found = false;
                    }
                }
            }

            // transposition
            for (int i = 0; i < word.Length - 1; i++)
            {
                string newString = String.Copy(word);
                string charItem = newString[i].ToString();
                string edited = newString.Remove(i, 1).Insert(i + 1, charItem);
                if (dictionary.FindWord(edited))
                {
                    double prop = this.CalculateProbability(EditOperation.Transposition, word[i], word[i + 1]);
                    if (!result.ContainsKey(edited))
                    {
                        result.Add(edited, prop);
                    }
                    else if(prop > result[edited])
                    {
                        result[edited] = prop;
                    }
                }
            }

            return result;
        }
Пример #7
0
        protected Dictionary<char, List<char>> ParsePairs(string[] pairs)
        {
            Dictionary<char, List<char>> result = new Dictionary<char, List<char>>();
            foreach (string pair in pairs)
            {
                string[] data = pair.Split(new char[] { '-' }, StringSplitOptions.RemoveEmptyEntries);
                if (result.ContainsKey(data[0][0]))
                {
                    result[data[0][0]].Add(data[1][0]);
                }
                else
                {
                    result.Add(data[0][0], new List<char>() { data[1][0] });
                }
            }

            return result;
        }