/// <summary> /// Clones this instance. /// </summary> /// <returns></returns> public PinglishString Clone() { var cloned = new PinglishString(); cloned.PersianLetters.AddRange(PersianLetters); cloned.EnglishLetters.AddRange(EnglishLetters); return(cloned); }
public PinglishString ToLower() { PinglishString cloned = Clone(); for (int i = 0; i < EnglishLetters.Count; i++) { cloned.EnglishLetters[i] = char.ToLower(EnglishLetters[i]); } return(cloned); }
private double GetProbability(PinglishString s) { double prob = 1; try { for (int i = 0; i < s.Length; i++) { prob *= m_mappingDistribution[s.EnglishLetters[i]][s.PersianLetters[i]]; } } catch (Exception) { prob = 0.5; } return(prob); }
/// <summary> /// /// </summary> /// <param name="list"></param> /// <param name="englishLetter"></param> /// <param name="persianLetters">Must be sorted based on their weights</param> public static void Update(this List <PinglishString> list, char englishLetter, List <string> persianLetters) { int count = list.Count; while (count > 0) { PinglishString original = list[0]; foreach (var value in persianLetters) { PinglishString fs = original.Clone(); fs.Append(value, englishLetter); list.Add(fs); } list.RemoveAt(0); --count; } }
public void LearnWordMapping(PinglishString word, int prefixGram, int postfixGram) { var len = word.EnglishLetters.Count; for (int i = 0; i < len; ++i) { var prefix = GetPrefixForIndex(word.EnglishString, i, prefixGram); var postfix = GetPostfixForIndex(word.EnglishString, i, postfixGram); if (prefix.Length == prefixGram && postfix.Length == postfixGram) { if (word.EnglishLetters[i] == 'h' && prefix == "" && postfix == "laghi") { int kiloo; kiloo = 4; } UpdateDictionary( word.EnglishLetters[i], prefix, postfix, word.PersianLetters[i]); } } }
public void Learn(PinglishString word, bool appendToInternalDataset) { for (int i = 0; i < word.Length; i++) { char enChar = word.EnglishLetters[i]; string faChar = word.PersianLetters[i]; if (!m_counter.ContainsKey(enChar)) { m_counter.Add(enChar, new Dictionary <string, int>()); } if (!m_counter[enChar].ContainsKey(faChar)) { m_counter[enChar].Add(faChar, 0); } if (!m_sum.ContainsKey(enChar)) { m_sum.Add(enChar, 0); } m_counter[enChar][faChar] += 1; m_sum[enChar] += 1; } for (int prefixGram = 3; prefixGram >= 0; prefixGram--) { for (int postfixGram = 5 - prefixGram; postfixGram >= 0; postfixGram--) { m_mappingSequences.LearnWordMapping(word, prefixGram, postfixGram); } } if (appendToInternalDataset && !m_pinglishDataSet.Contains(word)) { m_pinglishDataSet.Add(word); } }
public static void Update(this List <PinglishString> list, char englishLetter, Dictionary <string, double> persianLetters) { if (persianLetters.Count == 0) { return; } int count = list.Count; while (count > 0) { PinglishString original = list[0]; foreach (var value in persianLetters.OrderByDescending(item => item.Value)) { PinglishString fs = original.Clone(); fs.Append(value.Key, englishLetter); list.Add(fs); } list.RemoveAt(0); --count; } }