Exemplo n.º 1
0
        public void Learn(List <PinglishString> listOfWords, bool appendToInternalDataset)
        {
            foreach (var word in listOfWords)
            {
                Learn(word, false);
            }

            foreach (char enChar in m_sum.Keys)
            {
                int sum = m_sum[enChar];

                if (!m_mappingDistribution.ContainsKey(enChar))
                {
                    m_mappingDistribution.Add(enChar, new Dictionary <string, double>());
                }
                foreach (string faChar in m_counter[enChar].Keys)
                {
                    if (!m_mappingDistribution[enChar].ContainsKey(faChar))
                    {
                        m_mappingDistribution[enChar].Add(faChar, 0);
                    }

                    m_mappingDistribution[enChar][faChar] = m_counter[enChar][faChar] / (double)sum;
                }
            }

            if (appendToInternalDataset)
            {
                m_pinglishDataSet = PinglishConverterUtils.MergePinglishStringLists(
                    m_pinglishDataSet, listOfWords, PinglishStringNormalizationOptions.NoDuplicatesEntries);
            }
        }
Exemplo n.º 2
0
        public PinglishMapping(string mappingFileName, string dicPath, PruneType pruneType)
        {
            try
            {
                List <PinglishString> list = PinglishConverterUtils.LoadPinglishStrings(mappingFileName);
                Learn(list, false);
                m_pinglishDataSet.AddRange(list.RemoveDuplicates());

                _suffixer = new PersianSuffixLemmatizer(true);
                Tools.LoadList(ref _dic, dicPath);
                _wordMapper = new WordMapper(_dic, pruneType);
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex);
            }
        }