public DictionaryTest() { dictionary = new Dictionary(new DictionaryLoader(new DefaultDictionaryFileParser()), "en_US", "gen", "abcdefghijklmnopqrstuvwxyz".ToCharArray() ); dictionary.Add("acres"); Dictionary<char, List<char>> accentPairs = new Dictionary<char, List<char>>(); accentPairs.Add('a', new List<char>() { 'á' }); accentPairs.Add('e', new List<char>() { 'é', 'ě' }); accentPairs.Add('i', new List<char>() { 'í' }); accentPairs.Add('o', new List<char>() { 'ó' }); accentPairs.Add('u', new List<char>() { 'ú', 'ů' }); accentPairs.Add('y', new List<char>() { 'ý' }); accentPairs.Add('c', new List<char>() { 'č' }); accentPairs.Add('d', new List<char>() { 'ď' }); accentPairs.Add('n', new List<char>() { 'ň' }); accentPairs.Add('r', new List<char>() { 'ř' }); accentPairs.Add('s', new List<char>() { 'š' }); accentPairs.Add('t', new List<char>() { 'ť' }); accentPairs.Add('z', new List<char>() { 'ž' }); csCZ = new Dictionary(new DictionaryLoader(new DefaultDictionaryFileParser()), "cs_CZ", "gen", "abcdefghijklmnopqrstuvwxyzáéíóúýčďěňřšťžů".ToCharArray(), null, "[a-záéíóúýčďěňřšťžů]+", accentPairs); csCZ.Add("večeře"); csCZ.Add("véčera"); csCZ.Add("věci"); }
public LanguageModelEvaluation EvaluateCandidates(MisspelledWord word, Dictionary<string, double> candidates) { foundInNgrams = false; List<string> leftContext = word.GetLeftContext(); NgramType type = this.dictionary.GetHighestAvailableNgramCollection(leftContext.Count); Dictionary<string, double> probability = new Dictionary<string, double>(); string[] lcArray = this.GetLeftContext(leftContext, type); NgramEvaluation evaluation; foreach (KeyValuePair<string, double> option in candidates) { lcArray[leftContext.Count - 1] = option.Key.Contains(' ') ? option.Key.Split(space).First() : option.Key; evaluation = this.dictionary.GetNgramCollection(type).GetProbability(lcArray); probability.Add(option.Key, evaluation.Probability); if (!foundInNgrams && evaluation.Occurence > 0) { foundInNgrams = true; } } List<string> rightContext = word.GetRightContext(); NgramType secType = this.dictionary.GetHighestAvailableNgramCollection(rightContext.Count); if (type == NgramType.Unigram && type == NgramType.Unigram) { // do nothing } else { string[] rcArray = this.GetRightContext(rightContext, secType); foreach (KeyValuePair<string, double> option in candidates) { rcArray[0] = option.Key.Contains(' ') ? option.Key.Split(space).Last() : option.Key; evaluation = this.dictionary.GetNgramCollection(secType).GetProbability(rcArray); probability[option.Key] *= evaluation.Probability; if (!foundInNgrams && evaluation.Occurence > 0) { foundInNgrams = true; } } } return new LanguageModelEvaluation(probability, foundInNgrams); }
public void SetValue(char rowKey, char columnKey, int value) { if (matrix.ContainsKey(rowKey)) { if (matrix[rowKey].ContainsKey(columnKey)) { matrix[rowKey][columnKey] = value; } else { matrix[rowKey].Add(columnKey, value); } } else { Dictionary<char, int> innerDict = new Dictionary<char,int>(); innerDict.Add(columnKey, value); matrix.Add(rowKey, innerDict); } }
internal void ParseDictionary(Dictionary dictionary) { AffixRules rules = null; Encoding encoding = null; string affixFile = dictionary.GetFile(DictionaryFileType.Affix); if (null != affixFile) { encoding = Utils.DetectEncoding(affixFile); if (null == encoding) { encoding = EncodingDetector.DetectEncoding(affixFile); } rules = this.affixParser.Parse(affixFile, encoding); } string fileName = dictionary.GetFile(DictionaryFileType.Dictionary); DictionaryWithFlags rawDict = this.parser.Parse(fileName, encoding); foreach (DictionaryItemWithFlags item in rawDict) { if (null == item.Flags) { dictionary.Add(item.Word); } else { dictionary.AddRange(rules.GetPossibleWords(item)); } } }
// todo move internal void ParseSimpleDictionary(Dictionary dictionary) { string file = dictionary.GetFile(DictionaryFileType.LineDictionary); if (null != file) { Encoding enc = EncodingDetector.DetectEncoding(file); using (StreamReader reader = new StreamReader(file, enc)) { while (!reader.EndOfStream) { dictionary.Add(reader.ReadLine()); } } } }
public Dictionary<string, double> GeneratePossibleWords(string word) { Dictionary<string, double> result = new Dictionary<string, double>(); // substitution for (int i = 0; i < word.Length; i++) { foreach (string charItem in alphabet) { string edited = String.Copy(word).Remove(i, 1).Insert(i, charItem); if (dictionary.FindWord(edited)) { double prop = this.CalculateProbability(EditOperation.Substitution, word[i], charItem[0]); if (!result.ContainsKey(edited)) { result.Add(edited, prop); } else if (prop > result[edited]) { result[edited] = prop; } } } } // deletions for (int i = 0; i < word.Length; i++) { string edited = String.Copy(word).Remove(i, 1); if (dictionary.FindWord(edited)) { char prev = (i - 1) < 0 ? ' ' : word[i]; double prop = this.CalculateProbability(EditOperation.Deletion,prev,word[i]); if (!result.ContainsKey(edited)) { result.Add(edited, prop); } else if (prop > result[edited]) { result[edited] = prop; } } } bool found = false; // insertions for (int i = 0; i <= word.Length; i++) { foreach (string item in alphabetWithSpace) { string edited = String.Copy(word).Insert(i, item); if (item == " ") { string tr = edited.Trim(); if (tr != word) { string[] parts = tr.Split(space); foreach (string part in parts) { if (dictionary.FindWord(part)) { found = true; } else { found = false; break; } } } } if (found || dictionary.FindWord(edited)) { char prev = (i - 1) < 0 ? ' ' : word[i-1]; double prop = this.CalculateProbability(EditOperation.Insertion, prev, item[0]); if (!result.ContainsKey(edited)) { result.Add(edited, prop); } else if (prop > result[edited]) { result[edited] = prop; } found = false; } } } // transposition for (int i = 0; i < word.Length - 1; i++) { string newString = String.Copy(word); string charItem = newString[i].ToString(); string edited = newString.Remove(i, 1).Insert(i + 1, charItem); if (dictionary.FindWord(edited)) { double prop = this.CalculateProbability(EditOperation.Transposition, word[i], word[i + 1]); if (!result.ContainsKey(edited)) { result.Add(edited, prop); } else if(prop > result[edited]) { result[edited] = prop; } } } return result; }
protected Dictionary<char, List<char>> ParsePairs(string[] pairs) { Dictionary<char, List<char>> result = new Dictionary<char, List<char>>(); foreach (string pair in pairs) { string[] data = pair.Split(new char[] { '-' }, StringSplitOptions.RemoveEmptyEntries); if (result.ContainsKey(data[0][0])) { result[data[0][0]].Add(data[1][0]); } else { result.Add(data[0][0], new List<char>() { data[1][0] }); } } return result; }