public List <ResultWord> SelectBests( ResultWord enWord, KeyValuePair <string, double>[][] sortedDistribution ) { return(SelectBests(enWord, sortedDistribution, false, PinglishConverterConfig.ThreshHoldForSearchingProb, PinglishConverterConfig.ThreshHoldForSearchingCounter)); }
public GameState GetWinner() { GameState gameState = GameState.inGame; if (Tries == 0) { gameState = GameState.lose; } else if (!ResultWord.Contains('?')) { gameState = GameState.win; } return(gameState); }
public List <ResultWord> SelectBests( ResultWord enWord, KeyValuePair <string, double>[][] sortedDistribution, bool justFirst, double probeTreshHold, int counterTreshHold ) { bool firstSeen = false; if (enWord.Word == null) { return(new List <ResultWord>()); } var queue = new PriorityQueue <double, WordMapping>(); var beforSeen = new HashSet <string>(); var currentWordMapping = new WordMapping(enWord.Word); beforSeen.Add(currentWordMapping.GetHash()); var list = new List <ResultWord>(); var watchList = new List <string>(); int counter = 1; while (currentWordMapping.GetProb(sortedDistribution) > probeTreshHold && counter < counterTreshHold) { string currentWord = Tools.NormalizeString(currentWordMapping.ToString(sortedDistribution)); if ((currentWord != null) && (!watchList.Contains(currentWord))) { watchList.Add(currentWord); bool hitted = Tools.IsValidInDictionary(currentWord, _dictionary, _suffixer, _pruneType); if (hitted) { var newResultWord = new ResultWord(currentWord, ((ResultType.Transliterate | ResultType.HittedToDic) | enWord.Type), currentWordMapping.GetProb(sortedDistribution), true); list.Add(newResultWord); if (justFirst) { return(list); } } else { if ((!justFirst) || (!firstSeen)) { list.Add(new ResultWord(currentWord, ((ResultType.Transliterate) | enWord.Type), currentWordMapping.GetProb(sortedDistribution), true)); } firstSeen = true; } } WordMapping[] wordMappings = currentWordMapping.GetNexts(sortedDistribution); foreach (WordMapping wordMapping in wordMappings) { if (!beforSeen.Contains(wordMapping.GetHash())) { queue.Add(new KeyValuePair <double, WordMapping>(1 - wordMapping.GetProb(sortedDistribution), wordMapping)); beforSeen.Add(wordMapping.GetHash()); } } if (queue.Count == 0) { break; } currentWordMapping = queue.Dequeue().Value; counter++; } return(list); }
public List <ResultWord> SuggestFarsiWords(ResultWord pinglishWord, bool justFirsts) { return(SuggestWords(pinglishWord, justFirsts, true)); }
/// <summary> /// /// </summary> /// <param name="pinglishWord"></param> /// <returns></returns> public List <ResultWord> SuggestWords(ResultWord piWord, bool justFirst, bool removeDuplicates) { string pinglishWord = piWord.Word; var distribution = new KeyValuePair <string, double> [pinglishWord.Length][]; var exactWords = SuggestByExactSearchInDataset(pinglishWord); //if (words.Count > 0) // return words; var words = new List <PinglishString>(); var len = pinglishWord.Length; words.Add(new PinglishString()); //var charSuggs = new List<string>(); Dictionary <string, double> charSuggsWithCount = new Dictionary <string, double>(); for (int index = 0; index < len; ++index) { charSuggsWithCount.Clear(); int[] pre = { 2, 3, 2, 1, 1, 3, 1, 2, 1, 0, 0, 0, 3, 0, 2, 0, 1, 0 }; int[] pst = { 3, 2, 2, 4, 3, 1, 2, 1, 1, 5, 4, 3, 0, 2, 0, 1, 0, 0 }; for (int i = 0; i < pre.Length; i++) { charSuggsWithCount = GetCharSuggs(m_mappingSequences, pinglishWord, index, charSuggsWithCount, pre[i], pst[i]); } // No Erabs at the begining of the word if (index == 0) { charSuggsWithCount = charSuggsWithCount.Where(item => !PersianAlphabets.Erabs.Contains(item.Key)) .ToDictionary(x => x.Key, x => x.Value); } if (charSuggsWithCount.Count == 0) { // TODO: Generate every possible mapping var map = SingleValueCharMappings.TryGetValue(pinglishWord[index]); if (map != null) { charSuggsWithCount.Add(map.Value.ToString(), 1); } else { // TODO //throw new Exception(); } } double sum = 0; var templist = new List <KeyValuePair <string, double> >(); foreach (KeyValuePair <string, double> kv in charSuggsWithCount) { sum += kv.Value; } foreach (KeyValuePair <string, double> kv in charSuggsWithCount) { templist.Add(new KeyValuePair <string, double>(kv.Key, kv.Value / sum)); } templist.Sort((kv1, kv2) => Math.Sign(kv2.Value - kv1.Value)); distribution[index] = templist.ToArray(); //words.Update(pinglishWord[index], charSuggsWithCount); } words.InsertRange(0, exactWords); List <string> listExact = exactWords.Select(v => Tools.NormalizeString(v.PersianString)).ToList(); var semiFinRes = _wordMapper.SelectBests(piWord, distribution, justFirst); var finRes = new List <ResultWord>(); foreach (var resultWord in semiFinRes) { if (listExact.Contains(resultWord.Word)) { finRes.Add(new ResultWord(resultWord.Word, resultWord.Type | ResultType.HittedToDic, resultWord.Probability + 1, resultWord.IsFinal)); listExact.Remove(resultWord.Word); } else { finRes.Add(resultWord); } } foreach (var exactRest in listExact) { finRes.Add(new ResultWord(exactRest, ResultType.Transliterate | ResultType.HittedToDic, 1.0, true)); } // foreach (PinglishString s in exactWords) // finRes.Add(new ResultWord(StringUtil.RemoveErab(s.PersianString), ResultType.Transliterate | piWord.Type | ResultType.HittedToDic, 1.0, true));); return(finRes); /* * * string pinglishWord = piWord.Word; * * * var exactWords = SuggestByExactSearchInDataset(pinglishWord); * * if (justFirst && exactWords.Count!=0) * { * List<ResultWord> list = new List<ResultWord>(); * foreach (var exactWord in exactWords) * list.Add(new ResultWord(Tools.NormalizeString( exactWord.PersianString),piWord.Type | ResultType.Transliterate|ResultType.HittedToDic , 1.0 ,true)); * * return list; * } * * //if (words.Count > 0) * // return words; * * var words = new List<PinglishString>(); * var len = pinglishWord.Length; * * words.Add(new PinglishString()); * * //var charSuggs = new List<string>(); * var charSuggsWithCount = new Dictionary<string, int>(); * * for (int index = 0; index < len; ++index) * { * charSuggsWithCount.Clear(); * #region new_approach * * int[] pre = {2, 3, 2, 1, 1, 3, 1, 2, 1, 0, 0, 0, 3, 0, 2, 0, 1, 0}; * int[] pst = {3, 2, 2, 4, 3, 1, 2, 1, 1, 5, 4, 3, 0, 2, 0, 1, 0, 0}; * * int downCounter = 3; * * for (int i=0;i<pre.Length;i++) * { * if (charSuggsWithCount.Count==0) * { * charSuggsWithCount = GetCharSuggs(m_mappingSequences, pinglishWord, index, charSuggsWithCount, pre[i], pst[i]); * } * else if (charSuggsWithCount.Count!=0 && downCounter>0) * { * downCounter--; * charSuggsWithCount = GetCharSuggs(m_mappingSequences, pinglishWord, index, charSuggsWithCount, pre[i], pst[i]); * } * else * break; * } * #endregion * #region Heuristical techniques to improve results * * // No Erabs at the begining of the word * if (index == 0) * { * charSuggsWithCount = charSuggsWithCount.Where(item => !PersianAlphabets.Erabs.Contains(item.Key)) * .ToDictionary(x => x.Key, x => x.Value); * } * * // No Pseudo-space at the end of the word * if (index == len - 1) * { * //charSuggsWithCount = charSuggsWithCount.Where(item => * // { * // int endIndex = item.Key.Length - 1; * // return !(endIndex >= 0 && * // item.Key[endIndex] == * // PseudoSpace.ZWNJ); * // }).ToDictionary(x => x.Key, x => x.Value); * } * #endregion * * if (charSuggsWithCount.Count == 0) * { * // TODO: Generate every possible mapping * var map = SingleValueCharMappings.TryGetValue(pinglishWord[index]); * if (map != null) * { * charSuggsWithCount.Add(map.Value.ToString(), 1); * } * else * { * // TODO * //throw new Exception(); * } * } * * if (justFirst) * { * var seen = false; * foreach (var s in charSuggsWithCount.Keys) * { * if (seen) * charSuggsWithCount.Remove(s); * * seen = true; * } * } * * * words.Update(pinglishWord[index], charSuggsWithCount); * } * * words.InsertRange(0, exactWords); * List<PinglishString> semiFinanllResult = new List<PinglishString>(); * * if (removeDuplicates) * semiFinanllResult = words.Distinct(new PinglishStringEqualityComparer()).ToList(); * else * semiFinanllResult = words; * * var finRes = new List<ResultWord>(); * * bool first = true; * * foreach (PinglishString s in semiFinanllResult) * { * string perWord = s.PersianString; * perWord = StringUtil.RemoveErab(perWord); * if (Tools.IsValidInDictionary(perWord, _dic, _suffixer, _pruneType)) * { * finRes.Add(new ResultWord(perWord, ResultType.Transliterate | piWord.Type | ResultType.HittedToDic, * (first ? 1 : GetProbability(s)) * piWord.Probability, * true)); * first = false; * } * else * { * finRes.Add(new ResultWord(perWord, ResultType.Transliterate | piWord.Type, * (first ? 1 : GetProbability(s))*piWord.Probability, * true)); * first = false; * } * } * * foreach(PinglishString s in exactWords ) * finRes.Add(new ResultWord(StringUtil.RemoveErab( s.PersianString), ResultType.Transliterate | piWord.Type | ResultType.HittedToDic, 1.0, true)); * return finRes; */ }