public void Learn(List <PinglishString> listOfWords, bool appendToInternalDataset) { foreach (var word in listOfWords) { Learn(word, false); } foreach (char enChar in m_sum.Keys) { int sum = m_sum[enChar]; if (!m_mappingDistribution.ContainsKey(enChar)) { m_mappingDistribution.Add(enChar, new Dictionary <string, double>()); } foreach (string faChar in m_counter[enChar].Keys) { if (!m_mappingDistribution[enChar].ContainsKey(faChar)) { m_mappingDistribution[enChar].Add(faChar, 0); } m_mappingDistribution[enChar][faChar] = m_counter[enChar][faChar] / (double)sum; } } if (appendToInternalDataset) { m_pinglishDataSet = PinglishConverterUtils.MergePinglishStringLists( m_pinglishDataSet, listOfWords, PinglishStringNormalizationOptions.NoDuplicatesEntries); } }
public PinglishMapping(string mappingFileName, string dicPath, PruneType pruneType) { try { List <PinglishString> list = PinglishConverterUtils.LoadPinglishStrings(mappingFileName); Learn(list, false); m_pinglishDataSet.AddRange(list.RemoveDuplicates()); _suffixer = new PersianSuffixLemmatizer(true); Tools.LoadList(ref _dic, dicPath); _wordMapper = new WordMapper(_dic, pruneType); } catch (Exception ex) { Debug.WriteLine(ex); } }