Example #1
0
 public List <ResultWord> SelectBests(
     ResultWord enWord,
     KeyValuePair <string, double>[][] sortedDistribution
     )
 {
     return(SelectBests(enWord, sortedDistribution, false, PinglishConverterConfig.ThreshHoldForSearchingProb, PinglishConverterConfig.ThreshHoldForSearchingCounter));
 }
Example #2
0
        public GameState GetWinner()
        {
            GameState gameState = GameState.inGame;


            if (Tries == 0)
            {
                gameState = GameState.lose;
            }
            else if (!ResultWord.Contains('?'))
            {
                gameState = GameState.win;
            }

            return(gameState);
        }
Example #3
0
        public List <ResultWord> SelectBests(
            ResultWord enWord,
            KeyValuePair <string, double>[][] sortedDistribution,
            bool justFirst,
            double probeTreshHold,
            int counterTreshHold
            )
        {
            bool firstSeen = false;

            if (enWord.Word == null)
            {
                return(new List <ResultWord>());
            }

            var queue     = new PriorityQueue <double, WordMapping>();
            var beforSeen = new HashSet <string>();

            var currentWordMapping = new WordMapping(enWord.Word);

            beforSeen.Add(currentWordMapping.GetHash());

            var list      = new List <ResultWord>();
            var watchList = new List <string>();

            int counter = 1;

            while (currentWordMapping.GetProb(sortedDistribution) > probeTreshHold && counter < counterTreshHold)
            {
                string currentWord = Tools.NormalizeString(currentWordMapping.ToString(sortedDistribution));
                if ((currentWord != null) && (!watchList.Contains(currentWord)))
                {
                    watchList.Add(currentWord);

                    bool hitted = Tools.IsValidInDictionary(currentWord, _dictionary, _suffixer, _pruneType);
                    if (hitted)
                    {
                        var newResultWord = new ResultWord(currentWord,
                                                           ((ResultType.Transliterate | ResultType.HittedToDic) | enWord.Type),
                                                           currentWordMapping.GetProb(sortedDistribution), true);
                        list.Add(newResultWord);

                        if (justFirst)
                        {
                            return(list);
                        }
                    }
                    else
                    {
                        if ((!justFirst) || (!firstSeen))
                        {
                            list.Add(new ResultWord(currentWord,
                                                    ((ResultType.Transliterate) | enWord.Type),
                                                    currentWordMapping.GetProb(sortedDistribution), true));
                        }
                        firstSeen = true;
                    }
                }
                WordMapping[] wordMappings = currentWordMapping.GetNexts(sortedDistribution);
                foreach (WordMapping wordMapping in wordMappings)
                {
                    if (!beforSeen.Contains(wordMapping.GetHash()))
                    {
                        queue.Add(new KeyValuePair <double, WordMapping>(1 - wordMapping.GetProb(sortedDistribution),
                                                                         wordMapping));
                        beforSeen.Add(wordMapping.GetHash());
                    }
                }
                if (queue.Count == 0)
                {
                    break;
                }
                currentWordMapping = queue.Dequeue().Value;
                counter++;
            }

            return(list);
        }
 public List <ResultWord> SuggestFarsiWords(ResultWord pinglishWord, bool justFirsts)
 {
     return(SuggestWords(pinglishWord, justFirsts, true));
 }
        /// <summary>
        ///
        /// </summary>
        /// <param name="pinglishWord"></param>
        /// <returns></returns>
        public List <ResultWord> SuggestWords(ResultWord piWord, bool justFirst, bool removeDuplicates)
        {
            string pinglishWord = piWord.Word;
            var    distribution = new KeyValuePair <string, double> [pinglishWord.Length][];


            var exactWords = SuggestByExactSearchInDataset(pinglishWord);

            //if (words.Count > 0)
            //    return words;

            var words = new List <PinglishString>();
            var len   = pinglishWord.Length;

            words.Add(new PinglishString());

            //var charSuggs = new List<string>();
            Dictionary <string, double> charSuggsWithCount = new Dictionary <string, double>();

            for (int index = 0; index < len; ++index)
            {
                charSuggsWithCount.Clear();
                int[] pre = { 2, 3, 2, 1, 1, 3, 1, 2, 1, 0, 0, 0, 3, 0, 2, 0, 1, 0 };
                int[] pst = { 3, 2, 2, 4, 3, 1, 2, 1, 1, 5, 4, 3, 0, 2, 0, 1, 0, 0 };

                for (int i = 0; i < pre.Length; i++)
                {
                    charSuggsWithCount = GetCharSuggs(m_mappingSequences, pinglishWord, index, charSuggsWithCount, pre[i], pst[i]);
                }


                // No Erabs at the begining of the word
                if (index == 0)
                {
                    charSuggsWithCount = charSuggsWithCount.Where(item => !PersianAlphabets.Erabs.Contains(item.Key))
                                         .ToDictionary(x => x.Key, x => x.Value);
                }

                if (charSuggsWithCount.Count == 0)
                {
                    // TODO: Generate every possible mapping
                    var map = SingleValueCharMappings.TryGetValue(pinglishWord[index]);
                    if (map != null)
                    {
                        charSuggsWithCount.Add(map.Value.ToString(), 1);
                    }
                    else
                    {
                        // TODO
                        //throw new Exception();
                    }
                }

                double sum      = 0;
                var    templist = new List <KeyValuePair <string, double> >();
                foreach (KeyValuePair <string, double> kv in charSuggsWithCount)
                {
                    sum += kv.Value;
                }
                foreach (KeyValuePair <string, double> kv in charSuggsWithCount)
                {
                    templist.Add(new KeyValuePair <string, double>(kv.Key, kv.Value / sum));
                }
                templist.Sort((kv1, kv2) => Math.Sign(kv2.Value - kv1.Value));
                distribution[index] = templist.ToArray();

                //words.Update(pinglishWord[index], charSuggsWithCount);
            }

            words.InsertRange(0, exactWords);
            List <string> listExact = exactWords.Select(v => Tools.NormalizeString(v.PersianString)).ToList();

            var semiFinRes = _wordMapper.SelectBests(piWord, distribution, justFirst);
            var finRes     = new List <ResultWord>();

            foreach (var resultWord in semiFinRes)
            {
                if (listExact.Contains(resultWord.Word))
                {
                    finRes.Add(new ResultWord(resultWord.Word, resultWord.Type | ResultType.HittedToDic, resultWord.Probability + 1,
                                              resultWord.IsFinal));
                    listExact.Remove(resultWord.Word);
                }
                else
                {
                    finRes.Add(resultWord);
                }
            }

            foreach (var exactRest in listExact)
            {
                finRes.Add(new ResultWord(exactRest, ResultType.Transliterate | ResultType.HittedToDic, 1.0, true));
            }

            //  foreach (PinglishString s in exactWords)
            //  finRes.Add(new ResultWord(StringUtil.RemoveErab(s.PersianString), ResultType.Transliterate | piWord.Type | ResultType.HittedToDic, 1.0, true)););
            return(finRes);

            /*
             *
             * string pinglishWord = piWord.Word;
             *
             *
             * var exactWords = SuggestByExactSearchInDataset(pinglishWord);
             *
             * if (justFirst && exactWords.Count!=0)
             * {
             *  List<ResultWord> list = new List<ResultWord>();
             *  foreach (var exactWord in exactWords)
             *      list.Add(new ResultWord(Tools.NormalizeString( exactWord.PersianString),piWord.Type | ResultType.Transliterate|ResultType.HittedToDic , 1.0 ,true));
             *
             *  return list;
             * }
             *
             * //if (words.Count > 0)
             * //    return words;
             *
             * var words = new List<PinglishString>();
             * var len = pinglishWord.Length;
             *
             * words.Add(new PinglishString());
             *
             * //var charSuggs = new List<string>();
             * var charSuggsWithCount = new Dictionary<string, int>();
             *
             * for (int index = 0; index < len; ++index)
             * {
             *  charSuggsWithCount.Clear();
             *
             #region new_approach
             *
             *  int[] pre = {2, 3, 2, 1, 1, 3, 1, 2, 1, 0, 0, 0, 3, 0, 2, 0, 1, 0};
             *  int[] pst = {3, 2, 2, 4, 3, 1, 2, 1, 1, 5, 4, 3, 0, 2, 0, 1, 0, 0};
             *
             *  int downCounter = 3;
             *
             *  for (int i=0;i<pre.Length;i++)
             *  {
             *      if (charSuggsWithCount.Count==0)
             *      {
             *          charSuggsWithCount = GetCharSuggs(m_mappingSequences, pinglishWord, index, charSuggsWithCount, pre[i], pst[i]);
             *      }
             *      else if (charSuggsWithCount.Count!=0 && downCounter>0)
             *      {
             *          downCounter--;
             *          charSuggsWithCount = GetCharSuggs(m_mappingSequences, pinglishWord, index, charSuggsWithCount, pre[i], pst[i]);
             *      }
             *      else
             *          break;
             *  }
             *
             #endregion
             *
             #region Heuristical techniques to improve results
             *
             *  // No Erabs at the begining of the word
             *  if (index == 0)
             *  {
             *      charSuggsWithCount = charSuggsWithCount.Where(item => !PersianAlphabets.Erabs.Contains(item.Key))
             *          .ToDictionary(x => x.Key, x => x.Value);
             *  }
             *
             *  // No Pseudo-space at the end of the word
             *  if (index == len - 1)
             *  {
             *      //charSuggsWithCount = charSuggsWithCount.Where(item =>
             *      //                                                  {
             *      //                                                      int endIndex = item.Key.Length - 1;
             *      //                                                      return !(endIndex >= 0 &&
             *      //                                                               item.Key[endIndex] ==
             *      //                                                               PseudoSpace.ZWNJ);
             *      //                                                  }).ToDictionary(x => x.Key, x => x.Value);
             *  }
             *
             #endregion
             *
             *  if (charSuggsWithCount.Count == 0)
             *  {
             *      // TODO: Generate every possible mapping
             *      var map = SingleValueCharMappings.TryGetValue(pinglishWord[index]);
             *      if (map != null)
             *      {
             *          charSuggsWithCount.Add(map.Value.ToString(), 1);
             *      }
             *      else
             *      {
             *          // TODO
             *          //throw new Exception();
             *      }
             *  }
             *
             *  if (justFirst)
             *  {
             *      var seen = false;
             *      foreach (var s in charSuggsWithCount.Keys)
             *      {
             *          if (seen)
             *              charSuggsWithCount.Remove(s);
             *
             *          seen = true;
             *      }
             *  }
             *
             *
             *  words.Update(pinglishWord[index], charSuggsWithCount);
             * }
             *
             * words.InsertRange(0, exactWords);
             * List<PinglishString> semiFinanllResult = new List<PinglishString>();
             *
             * if (removeDuplicates)
             *  semiFinanllResult =  words.Distinct(new PinglishStringEqualityComparer()).ToList();
             * else
             *  semiFinanllResult =  words;
             *
             * var finRes = new List<ResultWord>();
             *
             * bool first = true;
             *
             * foreach (PinglishString s in semiFinanllResult)
             * {
             *  string perWord = s.PersianString;
             *  perWord = StringUtil.RemoveErab(perWord);
             *  if (Tools.IsValidInDictionary(perWord, _dic, _suffixer, _pruneType))
             *  {
             *      finRes.Add(new ResultWord(perWord, ResultType.Transliterate | piWord.Type | ResultType.HittedToDic,
             *                                (first ? 1 : GetProbability(s)) * piWord.Probability,
             *                                                             true));
             *      first = false;
             *  }
             *  else
             *  {
             *      finRes.Add(new ResultWord(perWord, ResultType.Transliterate | piWord.Type,
             *                                (first ? 1 : GetProbability(s))*piWord.Probability,
             *                                true));
             *      first = false;
             *  }
             * }
             *
             * foreach(PinglishString s in exactWords )
             *  finRes.Add(new ResultWord(StringUtil.RemoveErab( s.PersianString), ResultType.Transliterate | piWord.Type | ResultType.HittedToDic, 1.0, true));
             * return finRes;
             */
        }